Introduce function rescue_pages() to protect pages in danger of thrashing. Signed-off-by: Wu Fengguang Signed-off-by: Andrew Morton --- linux-2.6.19-rc5-mm1.orig/mm/readahead.c +++ linux-2.6.19-rc5-mm1/mm/readahead.c @@ -708,6 +708,96 @@ unsigned long max_sane_readahead(unsigne } /* + * Adaptive read-ahead. + * + * Good read patterns are compact both in space and time. The read-ahead logic + * tries to grant larger read-ahead size to better readers under the constraint + * of system memory and load pressure. + * + * It employs two methods to estimate the max thrashing safe read-ahead size: + * 1. state based - the default one + * 2. context based - the failsafe one + * The integration of the dual methods has the merit of being agile and robust. + * It makes the overall design clean: special cases are handled in general by + * the stateless method, leaving the stateful one simple and fast. + * + * To improve throughput and decrease read delay, the logic 'looks ahead'. + * In most read-ahead chunks, one page will be selected and tagged with + * PG_readahead. Later when the page with PG_readahead is read, the logic + * will be notified to submit the next read-ahead chunk in advance. + * + * a read-ahead chunk + * +-----------------------------------------+ + * | # PG_readahead | + * +-----------------------------------------+ + * ^ When this page is read, notify me for the next read-ahead. + * + */ + +#ifdef CONFIG_ADAPTIVE_READAHEAD + +/* + * Move pages in danger (of thrashing) to the head of inactive_list. + * Not expected to happen frequently. + * + * @page will be skipped: it's grabbed and won't die away. + * The following @nr_pages-1 pages will be protected. + */ +static unsigned long rescue_pages(struct page *page, unsigned long nr_pages) +{ + int pgrescue = 0; + pgoff_t index = page_index(page); + struct address_space *mapping = page_mapping(page); + struct page *grabbed_page = NULL; + struct zone *zone; + + dprintk("rescue_pages(ino=%lu, index=%lu nr=%lu)\n", + mapping->host->i_ino, index, nr_pages); + + for(;;) { + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + + if (!PageLRU(page)) + goto out_unlock; + + while (page_mapping(page) == mapping && + page_index(page) == index) { + struct page *the_page = page; + page = list_entry((page)->lru.prev, struct page, lru); + if (!PageActive(the_page) && + !PageLocked(the_page) && + page_count(the_page) == 1) { + list_move(&the_page->lru, &zone->inactive_list); + pgrescue++; + } + index++; + if (!--nr_pages) + goto out_unlock; + } + + spin_unlock_irq(&zone->lru_lock); + cond_resched(); + + if (grabbed_page) + page_cache_release(grabbed_page); + grabbed_page = page = find_get_page(mapping, index); + if (!page) + goto out; + } + +out_unlock: + spin_unlock_irq(&zone->lru_lock); +out: + if (grabbed_page) + page_cache_release(grabbed_page); + ra_account(NULL, RA_EVENT_READAHEAD_RESCUE, pgrescue); + return nr_pages; +} + +#endif /* CONFIG_ADAPTIVE_READAHEAD */ + +/* * Read-ahead events accounting. */ #ifdef CONFIG_DEBUG_READAHEAD -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/