The file_ra_state.age is node specific, comparing ages between two nodes is a bug. So add code to - take down the node id in file_ra_state.flags; - ensure that we are comparing the ages from the same node. Signed-off-by: Fengguang Wu --- include/linux/fs.h | 2 +- mm/readahead.c | 30 +++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) --- linux-2.6.21-rc3-mm2.orig/include/linux/fs.h +++ linux-2.6.21-rc3-mm2/include/linux/fs.h @@ -738,7 +738,7 @@ struct file_ra_state { unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ - unsigned long flags; /* RA_FLAG_xxx | ra_class_old | ra_class_new */ + unsigned long flags; /* RA_FLAG_xxx | node_id | class_old | class_new */ unsigned long prev_page; /* Cache last read() position */ unsigned long ra_pages; /* Maximum readahead window */ }; --- linux-2.6.21-rc3-mm2.orig/mm/readahead.c +++ linux-2.6.21-rc3-mm2/mm/readahead.c @@ -52,11 +52,13 @@ EXPORT_SYMBOL_GPL(readahead_ratio); int readahead_hit_rate = 1; #endif /* CONFIG_ADAPTIVE_READAHEAD */ +#define RA_CLASS_SHIFT 4 +#define RA_CLASS_MASK ((1 << RA_CLASS_SHIFT) - 1) +#define RA_NODE_SHIFT (2 * RA_CLASS_SHIFT) +#define RA_NODE_MASK ((MAX_NUMNODES-1) << RA_NODE_SHIFT) /* * Detailed classification of read-ahead behaviors. */ -#define RA_CLASS_SHIFT 4 -#define RA_CLASS_MASK ((1 << RA_CLASS_SHIFT) - 1) enum ra_class { RA_CLASS_ALL, RA_CLASS_INITIAL, @@ -802,6 +804,11 @@ static inline enum ra_class ra_class_old return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK; } +static inline int ra_node_id(struct file_ra_state *ra) +{ + return (ra->flags >> RA_NODE_SHIFT) & RA_NODE_MASK; +} + static unsigned long ra_readahead_size(struct file_ra_state *ra) { return ra->readahead_index - ra->ra_index; @@ -864,6 +871,18 @@ static void ra_set_size(struct file_ra_s } /* + * Save the current node id and age. + */ +static void ra_save_node_age(struct file_ra_state *ra) +{ + int nid = numa_node_id(); + + ra->flags &= ~RA_NODE_MASK; + ra->flags |= nid << RA_NODE_SHIFT; + ra->age = nr_scanned_pages_node(nid); +} + +/* * Submit IO for the read-ahead request in file_ra_state. */ static unsigned long ra_submit(struct file_ra_state *ra, @@ -901,7 +920,7 @@ static unsigned long ra_submit(struct fi } /* Take down the current read-ahead aging value. */ - ra->age = nr_scanned_pages_node(numa_node_id()); + ra_save_node_age(ra); ra_size = ra_readahead_size(ra); la_size = ra_lookahead_size(ra); @@ -1025,9 +1044,10 @@ static unsigned long compute_thrashing_t unsigned long stream_shift; unsigned long ra_size; uint64_t ll; + int nid = ra_node_id(ra); - global_size = nr_free_inactive_pages_node(numa_node_id()); - global_shift = nr_scanned_pages_node(numa_node_id()) - ra->age; + global_size = nr_free_inactive_pages_node(nid); + global_shift = nr_scanned_pages_node(nid) - ra->age; global_shift |= 1UL; stream_shift = ra_invoke_interval(ra); -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/