Add new sysctl entries in /proc/sys/vm: - readahead_ratio = 50 i.e. set read-ahead size to <= readahead_ratio% thrashing threshold - readahead_hit_rate = 0 i.e. read-ahead hit ratio >= 1/readahead_hit_rate is deemed ok readahead_ratio also provides a way to select read-ahead logic at runtime: condition action ========================================================================== readahead_ratio == 0 disable read-ahead readahead_ratio == 1 select the (old) stock read-ahead logic readahead_ratio >= 2 select the (new) adaptive read-ahead logic readahead_hit_rate controls the features provided by context based read-ahead: condition enabled function ========================================================================== readahead_hit_rate == 0 handle only known good cases i.e. nfsd read and seek/cache hit recovery readahead_hit_rate == 1 also detect interleaved sequential reads readahead_hit_rate >= 2 further handle sparse access patterns Signed-off-by: Wu Fengguang DESC readahead-sysctl-parameters-fix EDESC From: Andrew Morton mm/readahead.c:41: parse error before numeric constant Cc: Wu Fengguang Signed-off-by: Andrew Morton --- linux-2.6.19-rc4-mm1.orig/Documentation/sysctl/vm.txt +++ linux-2.6.19-rc4-mm1/Documentation/sysctl/vm.txt @@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/ - min_slab_ratio - panic_on_oom - swap_prefetch +- readahead_ratio +- readahead_hit_rate ============================================================== @@ -216,3 +218,47 @@ copying back pages from swap into the sw practice it can take many minutes before the vm is idle enough. The default value is 1. + +============================================================== + +readahead_ratio + +This limits readahead size to percent of the thrashing threshold. +The thrashing threshold is dynamically estimated from the _history_ read +speed and system load, to deduce the _future_ readahead request size. + +Set it to a smaller value if you have not enough memory for all the +concurrent readers, or the I/O loads fluctuate a lot. But if there's +plenty of memory(>>2MB per reader), a bigger value may help performance. + +readahead_ratio also selects the readahead logic: + VALUE CODE PATH + ------------------------------------------- + 0 disable readahead totally + 1 select the stock readahead logic + 2-100 select the adaptive readahead logic + +The default value is 50. Reasonable values would be [50, 100]. + +============================================================== + +readahead_hit_rate + +This is the allowed sparseness(readahead-pages:accessed-pages) of the +context based readahead. If the previous readahead has bad hit rate, +the kernel will be reluctant to do the next readahead. + +The context based readahead logic can catch some semi-sequential patterns, +i.e. interleaved/intermixed reading. They are subtle and therefore missed by +the state based logic. However the logic can be overzealous and may hurt the +performance of pure random reads. + +Possible values can be: +0 only handle some known good cases, i.e. nfsd reads +1 detect semi-sequential read patterns, found in some postgresql + applications and video streaming services +2-8 detect sparse access patterns + +The larger value, the more capabilities, with more possible overheads. + +The default value is 0. --- linux-2.6.19-rc4-mm1.orig/include/linux/mm.h +++ linux-2.6.19-rc4-mm1/include/linux/mm.h @@ -1064,6 +1064,17 @@ void handle_ra_miss(struct address_space struct file_ra_state *ra, pgoff_t offset); unsigned long max_sane_readahead(unsigned long nr); +#ifdef CONFIG_ADAPTIVE_READAHEAD +extern int readahead_ratio; +#else +#define readahead_ratio 1 +#endif /* CONFIG_ADAPTIVE_READAHEAD */ + +static inline int prefer_adaptive_readahead(void) +{ + return readahead_ratio > 1; +} + /* Do stack extension */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); #ifdef CONFIG_IA64 --- linux-2.6.19-rc4-mm1.orig/include/linux/sysctl.h +++ linux-2.6.19-rc4-mm1/include/linux/sysctl.h @@ -204,6 +204,8 @@ enum VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ VM_SWAP_PREFETCH=36, /* swap prefetch */ + VM_READAHEAD_RATIO=37, /* percent of read-ahead size to thrashing-threshold */ + VM_READAHEAD_HIT_RATE=38, /* one accessed page legitimizes so many read-ahead pages */ }; --- linux-2.6.19-rc4-mm1.orig/kernel/sysctl.c +++ linux-2.6.19-rc4-mm1/kernel/sysctl.c @@ -78,6 +78,11 @@ extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; +#if defined(CONFIG_ADAPTIVE_READAHEAD) +extern int readahead_ratio; +extern int readahead_hit_rate; +#endif + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -1034,6 +1039,28 @@ static ctl_table vm_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_ADAPTIVE_READAHEAD + { + .ctl_name = VM_READAHEAD_RATIO, + .procname = "readahead_ratio", + .data = &readahead_ratio, + .maxlen = sizeof(readahead_ratio), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = VM_READAHEAD_HIT_RATE, + .procname = "readahead_hit_rate", + .data = &readahead_hit_rate, + .maxlen = sizeof(readahead_hit_rate), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, +#endif { .ctl_name = 0 } }; --- linux-2.6.19-rc4-mm1.orig/mm/readahead.c +++ linux-2.6.19-rc4-mm1/mm/readahead.c @@ -27,6 +27,25 @@ #define MIN_RA_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE) /* + * Adaptive read-ahead parameters. + */ + +/* In laptop mode, poll delayed look-ahead on every ## pages read. */ +#define LAPTOP_POLL_INTERVAL 16 + +/* Set look-ahead size to 1/# of the thrashing-threshold. */ +#define LOOKAHEAD_RATIO 8 + +#ifdef CONFIG_ADAPTIVE_READAHEAD +/* Set read-ahead size to ##% of the thrashing-threshold. */ +int readahead_ratio = 50; +EXPORT_SYMBOL_GPL(readahead_ratio); +#endif + +/* Readahead as long as cache hit ratio keeps above 1/##. */ +int readahead_hit_rate = 0; + +/* * Detailed classification of read-ahead behaviors. */ #define RA_CLASS_SHIFT 4 -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/