diff -urN linux-2.6/include/linux/swap.h linux-2.6-drop-behind/include/linux/swap.h --- linux-2.6/include/linux/swap.h 2007-07-21 18:26:00.000000000 -0400 +++ linux-2.6-drop-behind/include/linux/swap.h 2007-07-22 16:22:48.000000000 -0400 @@ -180,6 +180,7 @@ /* linux/mm/swap.c */ extern void FASTCALL(lru_cache_add(struct page *)); extern void FASTCALL(lru_cache_add_active(struct page *)); +extern void FASTCALL(lru_demote(struct page *)); extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *)); extern void lru_add_drain(void); diff -urN linux-2.6/kernel/sysctl.c linux-2.6-drop-behind/kernel/sysctl.c --- linux-2.6/kernel/sysctl.c 2007-07-21 18:26:01.000000000 -0400 +++ linux-2.6-drop-behind/kernel/sysctl.c 2007-07-22 16:20:27.000000000 -0400 @@ -163,6 +163,7 @@ extern int prove_locking; extern int lock_stat; +extern int sysctl_dropbehind; /* The default sysctl tables: */ @@ -1048,6 +1049,14 @@ .extra1 = &zero, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "drop_behind", + .data = &sysctl_dropbehind, + .maxlen = sizeof(sysctl_dropbehind), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff -urN linux-2.6/mm/readahead.c linux-2.6-drop-behind/mm/readahead.c --- linux-2.6/mm/readahead.c 2007-07-21 18:26:01.000000000 -0400 +++ linux-2.6-drop-behind/mm/readahead.c 2007-07-22 16:41:47.000000000 -0400 @@ -15,6 +15,7 @@ #include #include #include +#include void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { @@ -429,6 +430,8 @@ } EXPORT_SYMBOL_GPL(page_cache_sync_readahead); +int sysctl_dropbehind = 1; + /** * page_cache_async_readahead - file readahead for marked pages * @mapping: address_space which holds the pagecache and I/O vectors @@ -449,6 +452,11 @@ struct page *page, pgoff_t offset, unsigned long req_size) { + pgoff_t demote_idx = offset - min_t(pgoff_t, offset, ra->size); + struct page *pages[16]; + unsigned nr_pages; + unsigned i; + /* no read-ahead */ if (!ra->ra_pages) return; @@ -467,6 +475,36 @@ if (bdi_read_congested(mapping->backing_dev_info)) return; + /* + * Read-ahead use once: when the ra window is maximal this is a good + * hint that there is sequential IO, which implies that the pages that + * have been used thus far can be reclaimed + */ + if (sysctl_dropbehind && ra->size == ra->ra_pages) { + nr_pages = find_get_pages(mapping, + demote_idx, ARRAY_SIZE(pages), pages); + + for (i = 0; i < nr_pages; i++) { + page = pages[i]; + demote_idx = page_index(page); + + /* + * The page is active. This means there are other + * users. We should not take away somebody else's + * pages, so do not drop behind beyond this point. + */ + if (demote_idx < offset && !PageActive(page)) { + lru_demote(page); + } else { + demote_idx = offset; + break; + } + } + demote_idx++; + + release_pages(pages, nr_pages, 0); + } while (demote_idx < offset); + /* do read-ahead */ ondemand_readahead(mapping, ra, filp, true, offset, req_size); } diff -urN linux-2.6/mm/swap.c linux-2.6-drop-behind/mm/swap.c --- linux-2.6/mm/swap.c 2007-07-21 18:26:01.000000000 -0400 +++ linux-2.6-drop-behind/mm/swap.c 2007-07-22 16:26:35.000000000 -0400 @@ -30,6 +30,7 @@ #include #include #include +#include /* How many pages do we try to swap or page in/out together? */ int page_cluster; @@ -176,6 +177,7 @@ */ static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, }; +static DEFINE_PER_CPU(struct pagevec, lru_demote_pvecs) = { 0, }; void fastcall lru_cache_add(struct page *page) { @@ -197,6 +199,37 @@ put_cpu_var(lru_add_active_pvecs); } +static void __pagevec_lru_demote(struct pagevec *pvec) +{ + int i; + struct zone *zone = NULL; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + if (PageLRU(page)) { + page_referenced(page, 0); + if (PageActive(page)) { + ClearPageActive(page); + __dec_zone_state(zone, NR_ACTIVE); + __inc_zone_state(zone, NR_INACTIVE); + } + list_move_tail(&page->lru, &zone->inactive_list); + } + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} + static void __lru_add_drain(int cpu) { struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu); @@ -207,6 +240,9 @@ pvec = &per_cpu(lru_add_active_pvecs, cpu); if (pagevec_count(pvec)) __pagevec_lru_add_active(pvec); + pvec = &per_cpu(lru_demote_pvecs, cpu); + if (pagevec_count(pvec)) + __pagevec_lru_demote(pvec); } void lru_add_drain(void) @@ -403,6 +439,21 @@ } /* + * Function used to forcefully demote a page to the tail of the inactive + * list. + */ +void fastcall lru_demote(struct page *page) +{ + if (likely(get_page_unless_zero(page))) { + struct pagevec *pvec = &get_cpu_var(lru_demote_pvecs); + + if (!pagevec_add(pvec, page)) + __pagevec_lru_demote(pvec); + put_cpu_var(lru_demote_pvecs); + } +} + +/* * Try to drop buffers from the pages in a pagevec */ void pagevec_strip(struct pagevec *pvec)