Add another layer of fallback policy to make the home node concept useful from a memory allocation PoV. This changes the mpol order to: - vma->vm_ops->get_policy [if applicable] - vma->vm_policy [if applicable] - task->mempolicy - tsk_home_node() preferred [NEW] - default_policy Note that the tsk_home_node() policy has Migrate-on-Fault enabled to facilitate efficient on-demand memory migration. Signed-off-by: Peter Zijlstra --- mm/mempolicy.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -117,6 +117,22 @@ static struct mempolicy default_policy = .flags = MPOL_F_LOCAL, }; +static struct mempolicy preferred_node_policy[MAX_NUMNODES]; + +static struct mempolicy *get_task_policy(struct task_struct *p) +{ + struct mempolicy *pol = p->mempolicy; + int node; + + if (!pol) { + node = tsk_home_node(p); + if (node != -1) + pol = &preferred_node_policy[node]; + } + + return pol; +} + static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); /* @@ -1478,7 +1494,7 @@ asmlinkage long compat_sys_mbind(compat_ struct mempolicy *get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = task->mempolicy; + struct mempolicy *pol = get_task_policy(task); if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) { @@ -1856,7 +1872,7 @@ alloc_pages_vma(gfp_t gfp, int order, st */ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = get_task_policy(current); struct page *page; if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) @@ -2302,6 +2318,15 @@ void __init numa_policy_init(void) sizeof(struct sp_node), 0, SLAB_PANIC, NULL); + for_each_node(nid) { + preferred_node_policy[nid] = (struct mempolicy) { + .refcnt = ATOMIC_INIT(1), + .mode = MPOL_PREFERRED, + .flags = MPOL_F_MOF, + .v = { .preferred_node = nid, }, + }; + } + /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/