From 244e37f1f3978ff182b5e33b77b327e4f48bb438 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 30 May 2011 02:23:49 +0900 Subject: [PATCH] Revert "writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone" This reverts commit 0e093d99763eb4cea09f8ca4f1d01f34e121d10b. Conflicts: mm/vmscan.c Signed-off-by: Minchan Kim --- include/linux/backing-dev.h | 2 +- include/linux/mmzone.h | 8 ----- include/trace/events/writeback.h | 7 ---- mm/backing-dev.c | 61 +------------------------------------ mm/page_alloc.c | 4 +- mm/vmscan.c | 41 ++----------------------- 6 files changed, 9 insertions(+), 114 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 4ce34fa..8b0ae8b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -286,7 +286,7 @@ enum { void clear_bdi_congested(struct backing_dev_info *bdi, int sync); void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); -long wait_iff_congested(struct zone *zone, int sync, long timeout); + static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 02ecb01..e1b16aa 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -424,9 +424,6 @@ struct zone { typedef enum { ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ - ZONE_CONGESTED, /* zone has many dirty pages backed by - * a congested BDI - */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -444,11 +441,6 @@ static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag) clear_bit(flag, &zone->flags); } -static inline int zone_is_reclaim_congested(const struct zone *zone) -{ - return test_bit(ZONE_CONGESTED, &zone->flags); -} - static inline int zone_is_reclaim_locked(const struct zone *zone) { return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 4e249b9..fc2b3a0 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -180,13 +180,6 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait, TP_ARGS(usec_timeout, usec_delayed) ); -DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested, - - TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), - - TP_ARGS(usec_timeout, usec_delayed) -); - #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8e4ed88..c9e59de 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -729,7 +729,6 @@ static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) }; -static atomic_t nr_bdi_congested[2]; void clear_bdi_congested(struct backing_dev_info *bdi, int sync) { @@ -737,8 +736,7 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync) wait_queue_head_t *wqh = &congestion_wqh[sync]; bit = sync ? BDI_sync_congested : BDI_async_congested; - if (test_and_clear_bit(bit, &bdi->state)) - atomic_dec(&nr_bdi_congested[sync]); + clear_bit(bit, &bdi->state); smp_mb__after_clear_bit(); if (waitqueue_active(wqh)) wake_up(wqh); @@ -750,8 +748,7 @@ void set_bdi_congested(struct backing_dev_info *bdi, int sync) enum bdi_state bit; bit = sync ? BDI_sync_congested : BDI_async_congested; - if (!test_and_set_bit(bit, &bdi->state)) - atomic_inc(&nr_bdi_congested[sync]); + set_bit(bit, &bdi->state); } EXPORT_SYMBOL(set_bdi_congested); @@ -782,57 +779,3 @@ long congestion_wait(int sync, long timeout) } EXPORT_SYMBOL(congestion_wait); -/** - * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes - * @zone: A zone to check if it is heavily congested - * @sync: SYNC or ASYNC IO - * @timeout: timeout in jiffies - * - * In the event of a congested backing_dev (any backing_dev) and the given - * @zone has experienced recent congestion, this waits for up to @timeout - * jiffies for either a BDI to exit congestion of the given @sync queue - * or a write to complete. - * - * In the absense of zone congestion, cond_resched() is called to yield - * the processor if necessary but otherwise does not sleep. - * - * The return value is 0 if the sleep is for the full timeout. Otherwise, - * it is the number of jiffies that were still remaining when the function - * returned. return_value == timeout implies the function did not sleep. - */ -long wait_iff_congested(struct zone *zone, int sync, long timeout) -{ - long ret; - unsigned long start = jiffies; - DEFINE_WAIT(wait); - wait_queue_head_t *wqh = &congestion_wqh[sync]; - - /* - * If there is no congestion, or heavy congestion is not being - * encountered in the current zone, yield if necessary instead - * of sleeping on the congestion queue - */ - if (atomic_read(&nr_bdi_congested[sync]) == 0 || - !zone_is_reclaim_congested(zone)) { - cond_resched(); - - /* In case we scheduled, work out time remaining */ - ret = timeout - (jiffies - start); - if (ret < 0) - ret = 0; - - goto out; - } - - /* Sleep until uncongested or a write happens */ - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - ret = io_schedule_timeout(timeout); - finish_wait(wqh, &wait); - -out: - trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout), - jiffies_to_usecs(jiffies - start)); - - return ret; -} -EXPORT_SYMBOL(wait_iff_congested); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2828037..71e9842 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1929,7 +1929,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, preferred_zone, migratetype); if (!page && gfp_mask & __GFP_NOFAIL) - wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (!page && (gfp_mask & __GFP_NOFAIL)); return page; @@ -2137,7 +2137,7 @@ rebalance: pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { /* Wait for some write requests to complete then retry */ - wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto rebalance; } else { /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 0665520..59de427 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -703,14 +703,11 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct zone *zone, struct scan_control *sc) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); int pgactivate = 0; - unsigned long nr_dirty = 0; - unsigned long nr_congested = 0; unsigned long nr_reclaimed = 0; cond_resched(); @@ -730,7 +727,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != zone); sc->nr_scanned++; @@ -808,8 +804,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (PageDirty(page)) { - nr_dirty++; - if (references == PAGEREF_RECLAIM_CLEAN) goto keep_locked; if (!may_enter_fs) @@ -820,7 +814,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Page is dirty, try to write it out here */ switch (pageout(page, mapping, sc)) { case PAGE_KEEP: - nr_congested++; goto keep_locked; case PAGE_ACTIVATE: goto activate_locked; @@ -931,15 +924,6 @@ keep_lumpy: VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } - /* - * Tag a zone as congested if all the dirty pages encountered were - * backed by a congested BDI. In this case, reclaimers should just - * back off and wait for congestion to clear because further reclaim - * will encounter the same problem - */ - if (nr_dirty == nr_congested && nr_dirty != 0) - zone_set_flag(zone, ZONE_CONGESTED); - free_page_list(&free_pages); list_splice(&ret_pages, page_list); @@ -1426,12 +1410,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, spin_unlock_irq(&zone->lru_lock); - nr_reclaimed = shrink_page_list(&page_list, zone, sc); + nr_reclaimed = shrink_page_list(&page_list, sc); /* Check if we should syncronously wait for writeback */ if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, zone, sc); + nr_reclaimed += shrink_page_list(&page_list, sc); } local_irq_disable(); @@ -2085,14 +2069,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (!sc->hibernation_mode && sc->nr_scanned && - priority < DEF_PRIORITY - 2) { - struct zone *preferred_zone; - - first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), - &cpuset_current_mems_allowed, - &preferred_zone); - wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); - } + priority < DEF_PRIORITY - 2) + congestion_wait(BLK_RW_ASYNC, HZ/10); } out: @@ -2455,14 +2433,6 @@ loop_again: min_wmark_pages(zone), end_zone, 0)) has_under_min_watermark_zone = 1; } else { - /* - * If a zone reaches its high watermark, - * consider it to be no longer congested. It's - * possible there are dirty pages backed by - * congested BDIs but as pressure is relieved, - * spectulatively avoid congestion waits - */ - zone_clear_flag(zone, ZONE_CONGESTED); if (i <= *classzone_idx) balanced += zone->present_pages; } @@ -2546,9 +2516,6 @@ out: order = sc.order = 0; goto loop_again; } - - /* If balanced, clear the congested flag */ - zone_clear_flag(zone, ZONE_CONGESTED); } } -- 1.7.0.4