Add another layer of fallback policy to make the home node concept useful from a memory allocation PoV. This changes the mpol order to: - vma->vm_ops->get_policy [if applicable] - vma->vm_policy [if applicable] - task->mempolicy - tsk_home_node() preferred [NEW] - default_policy Note that the tsk_home_node() policy has Migrate-on-Fault enabled to facilitate efficient on-demand memory migration. Cc: Paul Turner Cc: Lee Schermerhorn Cc: Christoph Lameter Cc: Rik van Riel Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Peter Zijlstra --- mm/mempolicy.c | 29 +++++++++++++++++++++++++++-- 1 files changed, 27 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e0dde20..3e109ae 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -117,6 +117,22 @@ static struct mempolicy default_policy = { .flags = MPOL_F_LOCAL, }; +static struct mempolicy preferred_node_policy[MAX_NUMNODES]; + +static struct mempolicy *get_task_policy(struct task_struct *p) +{ + struct mempolicy *pol = p->mempolicy; + int node; + + if (!pol) { + node = tsk_home_node(p); + if (node != -1) + pol = &preferred_node_policy[node]; + } + + return pol; +} + static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); /* @@ -1519,7 +1535,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, struct mempolicy *get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = task->mempolicy; + struct mempolicy *pol = get_task_policy(task); if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) { @@ -1903,7 +1919,7 @@ retry_cpuset: */ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = get_task_policy(current); struct page *page; unsigned int cpuset_mems_cookie; @@ -2371,6 +2387,15 @@ void __init numa_policy_init(void) sizeof(struct sp_node), 0, SLAB_PANIC, NULL); + for_each_node(nid) { + preferred_node_policy[nid] = (struct mempolicy) { + .refcnt = ATOMIC_INIT(1), + .mode = MPOL_PREFERRED, + .flags = MPOL_F_MOF, + .v = { .preferred_node = nid, }, + }; + } + /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/