Only do the congestion wait when we actually encountered congestion. Signed-off-by: Peter Zijlstra --- include/linux/swap.h | 1 + mm/page_io.c | 9 +++++++++ mm/vmscan.c | 25 ++++++++++++++++++++----- 3 files changed, 30 insertions(+), 5 deletions(-) Index: linux-2.6-mm/mm/vmscan.c =================================================================== --- linux-2.6-mm.orig/mm/vmscan.c 2007-04-05 16:29:49.000000000 +0200 +++ linux-2.6-mm/mm/vmscan.c 2007-04-05 16:35:36.000000000 +0200 @@ -70,6 +70,8 @@ struct scan_control { int all_unreclaimable; int order; + + int encountered_congestion; }; /* @@ -315,7 +317,8 @@ typedef enum { * pageout is called by shrink_page_list() for each dirty page. * Calls ->writepage(). */ -static pageout_t pageout(struct page *page, struct address_space *mapping) +static pageout_t pageout(struct page *page, struct address_space *mapping, + struct scan_control *sc) { /* * If the page is dirty, only perform writeback if that write @@ -357,6 +360,7 @@ static pageout_t pageout(struct page *pa if (clear_page_dirty_for_io(page)) { int res; + struct backing_dev_info *bdi; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = SWAP_CLUSTER_MAX, @@ -366,6 +370,14 @@ static pageout_t pageout(struct page *pa .for_reclaim = 1, }; + if (mapping == &swapper_space) + bdi = swap_bdi(page); + else + bdi = mapping->backing_dev_info; + + if (bdi_congested(bdi, WRITE)) + sc->encountered_congestion = 1; + SetPageReclaim(page); res = mapping->a_ops->writepage(page, &wbc); if (res < 0) @@ -533,7 +545,7 @@ static unsigned long shrink_page_list(st goto keep_locked; /* Page is dirty, try to write it out here */ - switch(pageout(page, mapping)) { + switch(pageout(page, mapping, sc)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: @@ -1141,6 +1153,7 @@ unsigned long try_to_free_pages(struct z for (priority = DEF_PRIORITY; priority >= 0; priority--) { sc.nr_scanned = 0; + sc.encountered_congestion = 0; if (!priority) disable_swap_token(); nr_reclaimed += shrink_zones(priority, zones, &sc); @@ -1169,7 +1182,7 @@ unsigned long try_to_free_pages(struct z } /* Take a nap, wait for some writeback to complete */ - if (sc.nr_scanned && priority < DEF_PRIORITY - 2) + if (sc.encountered_congestion) congestion_wait(WRITE, HZ/10); } /* top priority shrink_caches still had more to do? don't OOM, then */ @@ -1250,6 +1263,7 @@ loop_again: int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; + sc.encountered_congestion = 0; /* The swap token gets in the way of swapout... */ if (!priority) disable_swap_token(); @@ -1337,7 +1351,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && priority < DEF_PRIORITY - 2) + if (sc.encountered_congestion) congestion_wait(WRITE, HZ/10); /* @@ -1580,6 +1594,7 @@ unsigned long shrink_all_memory(unsigned unsigned long nr_to_scan = nr_pages - ret; sc.nr_scanned = 0; + sc.encountered_congestion = 0; ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); if (ret >= nr_pages) goto out; @@ -1591,7 +1606,7 @@ unsigned long shrink_all_memory(unsigned if (ret >= nr_pages) goto out; - if (sc.nr_scanned && prio < DEF_PRIORITY - 2) + if (sc.encountered_congestion) congestion_wait(WRITE, HZ / 10); } } Index: linux-2.6-mm/include/linux/swap.h =================================================================== --- linux-2.6-mm.orig/include/linux/swap.h 2007-04-05 16:24:02.000000000 +0200 +++ linux-2.6-mm/include/linux/swap.h 2007-04-05 16:35:36.000000000 +0200 @@ -220,6 +220,7 @@ extern void swap_unplug_io_fn(struct bac #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ +extern struct backing_dev_info *swap_bdi(struct page *); extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); Index: linux-2.6-mm/mm/page_io.c =================================================================== --- linux-2.6-mm.orig/mm/page_io.c 2007-04-05 16:24:02.000000000 +0200 +++ linux-2.6-mm/mm/page_io.c 2007-04-05 16:36:26.000000000 +0200 @@ -19,6 +19,15 @@ #include #include +struct backing_dev_info *swap_bdi(struct page *page) +{ + struct swap_info_struct *sis; + swp_entry_t entry = { .val = page_private(page), }; + + sis = get_swap_info_struct(swp_type(entry)); + return blk_get_backing_dev_info(sis->bdev); +} + static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index, struct page *page, bio_end_io_t end_io) { -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/