Add MPOL_F_HOME, to implement multi-stage home node binding. Suggested-by: Andrea Arcangeli Suggested-by: Rik van Riel Signed-off-by: Peter Zijlstra Cc: Paul Turner Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) Index: tip/include/uapi/linux/mempolicy.h =================================================================== --- tip.orig/include/uapi/linux/mempolicy.h +++ tip/include/uapi/linux/mempolicy.h @@ -69,6 +69,7 @@ enum mpol_rebind_step { #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ +#define MPOL_F_HOME (1 << 4) /* this is the home-node policy */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ Index: tip/mm/mempolicy.c =================================================================== --- tip.orig/mm/mempolicy.c +++ tip/mm/mempolicy.c @@ -2190,6 +2190,7 @@ static void sp_free(struct sp_node *n) * @page - page to be checked * @vma - vm area where page mapped * @addr - virtual address where page mapped + * @multi - use multi-stage node binding * * Lookup current policy node id for vma,addr and "compare to" page's * node id. @@ -2252,6 +2253,37 @@ int mpol_misplaced(struct page *page, st default: BUG(); } + + /* + * Multi-stage node selection is used in conjunction with a periodic + * migration fault to build a temporal task<->page relation. By + * using a two-stage filter we remove short/unlikely relations. + * + * Using P(p) ~ n_p / n_t as per frequentist probability, we can + * equate a task's usage of a particular page (n_p) per total usage + * of this page (n_t) (in a given time-span) to a probability. + * + * Our periodic faults will then sample this probability and getting + * the same result twice in a row, given these samples are fully + * independent, is then given by P(n)^2, provided our sample period + * is sufficiently short compared to the usage pattern. + * + * This quadric squishes small probabilities, making it less likely + * we act on an unlikely task<->page relation. + */ + if (pol->flags & MPOL_F_HOME) { + int last_nid; + + /* + * Migrate towards the current node, depends on + * task_numa_placement() details. + */ + polnid = numa_node_id(); + last_nid = page_xchg_last_nid(page, polnid); + if (last_nid != polnid) + goto out; + } + if (curnid != polnid) ret = polnid; out: @@ -2444,7 +2476,7 @@ void __init numa_policy_init(void) preferred_node_policy[nid] = (struct mempolicy) { .refcnt = ATOMIC_INIT(1), .mode = MPOL_PREFERRED, - .flags = MPOL_F_MOF, + .flags = MPOL_F_MOF | MPOL_F_HOME, .v = { .preferred_node = nid, }, }; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/