Maintain a smoothed version of dirty pages for use in the throttle bandwidth calculations. default_backing_dev_info.avg_dirty holds the smoothed global dirty pages. The calculation favors smoothness rather than accuracy. It's non-sense trying to track a much fluctuated value "accurately". And its users don't really rely on it being accurate. CC: larry Signed-off-by: Wu Fengguang --- include/linux/backing-dev.h | 2 + mm/backing-dev.c | 3 + mm/page-writeback.c | 66 ++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) --- linux-next.orig/mm/page-writeback.c 2011-04-13 17:18:12.000000000 +0800 +++ linux-next/mm/page-writeback.c 2011-04-13 17:18:12.000000000 +0800 @@ -471,6 +471,64 @@ unsigned long bdi_dirty_limit(struct bac return bdi_dirty; } +static void bdi_update_dirty_smooth(struct backing_dev_info *bdi, + unsigned long dirty) +{ + unsigned long avg = bdi->avg_dirty; + unsigned long old = bdi->old_dirty; + + if (unlikely(!avg)) { + avg = dirty; + goto update; + } + + /* + * dirty pages are departing upwards, follow up + */ + if (avg < old && old <= dirty) { + avg += (old - avg) >> 2; + goto update; + } + + /* + * dirty pages are departing downwards, follow down + */ + if (avg > old && old >= dirty) { + avg -= (avg - old) >> 2; + goto update; + } + + /* + * This can filter out one half unnecessary updates when bdi_dirty is + * fluctuating around the balance point, and is most effective on XFS, + * whose pattern is + * . + * [.] dirty [-] avg . . + * . . + * . . . . . . + * --------------------------------------- . . + * . . . . . . + * . . . . . . + * . . . . . . + * . . . . . . + * . . . . + * . . . . (fluctuated) + * . . . . + * . . . . + * + * @avg will remain flat at the cost of being biased towards high. In + * practice the error tend to be much smaller: thanks to more coarse + * grained fluctuations, @avg becomes the real average number for the + * last two rising lines of @dirty. + */ + goto out; + +update: + bdi->avg_dirty = avg; +out: + bdi->old_dirty = dirty; +} + static void __bdi_update_write_bandwidth(struct backing_dev_info *bdi, unsigned long elapsed, unsigned long written) @@ -535,6 +593,14 @@ void bdi_update_bandwidth(struct backing if (elapsed <= HZ / 5) goto unlock; + if (thresh && + now - default_backing_dev_info.bw_time_stamp >= HZ / 5) { + bdi_update_dirty_smooth(&default_backing_dev_info, dirty); + default_backing_dev_info.bw_time_stamp = now; + } + if (thresh) { + bdi_update_dirty_smooth(bdi, bdi_dirty); + } __bdi_update_write_bandwidth(bdi, elapsed, written); snapshot: --- linux-next.orig/include/linux/backing-dev.h 2011-04-13 17:18:12.000000000 +0800 +++ linux-next/include/linux/backing-dev.h 2011-04-13 17:18:12.000000000 +0800 @@ -77,6 +77,8 @@ struct backing_dev_info { unsigned long written_stamp; unsigned long write_bandwidth; unsigned long avg_write_bandwidth; + unsigned long avg_dirty; + unsigned long old_dirty; struct prop_local_percpu completions; int dirty_exceeded; --- linux-next.orig/mm/backing-dev.c 2011-04-13 17:18:12.000000000 +0800 +++ linux-next/mm/backing-dev.c 2011-04-13 17:18:12.000000000 +0800 @@ -669,6 +669,9 @@ int bdi_init(struct backing_dev_info *bd bdi->write_bandwidth = INIT_BW; bdi->avg_write_bandwidth = INIT_BW; + bdi->avg_dirty = 0; + bdi->old_dirty = 0; + err = prop_local_init_percpu(&bdi->completions); if (err) { -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/