Use the read-ahead code to provide hints to page reclaim. This patch has the potential to solve the streaming-IO trashes my desktop problem. It tries to aggressively reclaim pages that were loaded in a strong sequential pattern and have been consumed. Thereby limiting the damage to the current resident set. Signed-off-by: Peter Zijlstra --- include/linux/swap.h | 1 + mm/readahead.c | 39 ++++++++++++++++++++++++++++++++++++++- mm/swap.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) Index: linux-2.6/mm/swap.c =================================================================== --- linux-2.6.orig/mm/swap.c +++ linux-2.6/mm/swap.c @@ -30,6 +30,7 @@ #include #include #include +#include /* How many pages do we try to swap or page in/out together? */ int page_cluster; @@ -176,6 +177,7 @@ EXPORT_SYMBOL(mark_page_accessed); */ static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, }; +static DEFINE_PER_CPU(struct pagevec, lru_demote_pvecs) = { 0, }; void fastcall lru_cache_add(struct page *page) { @@ -197,6 +199,37 @@ void fastcall lru_cache_add_active(struc put_cpu_var(lru_add_active_pvecs); } +static void __pagevec_lru_demote(struct pagevec *pvec) +{ + int i; + struct zone *zone = NULL; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + if (PageLRU(page)) { + page_referenced(page, 0); + if (PageActive(page)) { + ClearPageActive(page); + __dec_zone_state(zone, NR_ACTIVE); + __inc_zone_state(zone, NR_INACTIVE); + } + list_move_tail(&page->lru, &zone->inactive_list); + } + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} + static void __lru_add_drain(int cpu) { struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu); @@ -207,6 +240,9 @@ static void __lru_add_drain(int cpu) pvec = &per_cpu(lru_add_active_pvecs, cpu); if (pagevec_count(pvec)) __pagevec_lru_add_active(pvec); + pvec = &per_cpu(lru_demote_pvecs, cpu); + if (pagevec_count(pvec)) + __pagevec_lru_demote(pvec); } void lru_add_drain(void) @@ -403,6 +439,21 @@ void __pagevec_lru_add_active(struct pag } /* + * Function used to forcefully demote a page to the tail of the inactive + * list. + */ +void fastcall lru_demote(struct page *page) +{ + if (likely(get_page_unless_zero(page))) { + struct pagevec *pvec = &get_cpu_var(lru_demote_pvecs); + + if (!pagevec_add(pvec, page)) + __pagevec_lru_demote(pvec); + put_cpu_var(lru_demote_pvecs); + } +} + +/* * Try to drop buffers from the pages in a pagevec */ void pagevec_strip(struct pagevec *pvec) Index: linux-2.6/include/linux/swap.h =================================================================== --- linux-2.6.orig/include/linux/swap.h +++ linux-2.6/include/linux/swap.h @@ -180,6 +180,7 @@ extern unsigned int nr_free_pagecache_pa /* linux/mm/swap.c */ extern void FASTCALL(lru_cache_add(struct page *)); extern void FASTCALL(lru_cache_add_active(struct page *)); +extern void FASTCALL(lru_demote(struct page *)); extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *)); extern void lru_add_drain(void); Index: linux-2.6/mm/readahead.c =================================================================== --- linux-2.6.orig/mm/readahead.c +++ linux-2.6/mm/readahead.c @@ -15,6 +15,7 @@ #include #include #include +#include void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { @@ -448,13 +449,19 @@ EXPORT_SYMBOL_GPL(page_cache_sync_readah * page_cache_async_ondemand() should be called when a page is used which * has the PG_readahead flag: this is a marker to suggest that the application * has used up enough of the readahead window that we should start pulling in - * more pages. */ + * more pages. + */ void page_cache_async_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, struct page *page, pgoff_t offset, unsigned long req_size) { + pgoff_t demote_idx = offset - min_t(pgoff_t, offset, ra->size); + struct page *pages[16]; + unsigned nr_pages; + unsigned i; + /* no read-ahead */ if (!ra->ra_pages) return; @@ -473,6 +480,36 @@ page_cache_async_readahead(struct addres if (bdi_read_congested(mapping->backing_dev_info)) return; + /* + * Read-ahead use once: when the ra window is maximal this is a good + * hint that there is sequential IO, which implies that the pages that + * have been used thus far can be reclaimed + */ + if (ra->size == ra->ra_pages) do { + nr_pages = find_get_pages(mapping, + demote_idx, ARRAY_SIZE(pages), pages); + + for (i = 0; i < nr_pages; i++) { + page = pages[i]; + demote_idx = page_index(page); + + /* + * The page is active. This means there are other + * users. We should not take away somebody else's + * pages, so do not drop behind beyond this point. + */ + if (demote_idx < offset && !PageActive(page)) { + lru_demote(page); + } else { + demote_idx = offset; + break; + } + } + demote_idx++; + + release_pages(pages, nr_pages, 0); + } while (demote_idx < offset); + /* do read-ahead */ ondemand_readahead(mapping, ra, filp, true, offset, req_size); } -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/