[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1345647560-30387-25-git-send-email-aarcange@redhat.com>
Date: Wed, 22 Aug 2012 16:59:08 +0200
From: Andrea Arcangeli <aarcange@...hat.com>
To: linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc: Hillf Danton <dhillf@...il.com>, Dan Smith <danms@...ibm.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...e.hu>, Paul Turner <pjt@...gle.com>,
Suresh Siddha <suresh.b.siddha@...el.com>,
Mike Galbraith <efault@....de>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Lai Jiangshan <laijs@...fujitsu.com>,
Bharata B Rao <bharata.rao@...il.com>,
Lee Schermerhorn <Lee.Schermerhorn@...com>,
Rik van Riel <riel@...hat.com>,
Johannes Weiner <hannes@...xchg.org>,
Srivatsa Vaddagiri <vatsa@...ux.vnet.ibm.com>,
Christoph Lameter <cl@...ux.com>,
Alex Shi <alex.shi@...el.com>,
Mauricio Faria de Oliveira <mauricfo@...ux.vnet.ibm.com>,
Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>,
Don Morris <don.morris@...com>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>
Subject: [PATCH 24/36] autonuma: numa hinting page faults entry points
This is where the numa hinting page faults are detected and are passed
over to the AutoNUMA core logic.
Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
---
include/linux/huge_mm.h | 2 ++
mm/huge_memory.c | 18 ++++++++++++++++++
mm/memory.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 51 insertions(+), 0 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad4e2e0..5270c81 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,6 +11,8 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
pmd_t orig_pmd);
+extern pmd_t __huge_pmd_numa_fixup(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd, pmd_t *pmdp);
extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm);
extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
unsigned long addr,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0d2a12f..067cba1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1037,6 +1037,24 @@ out:
return page;
}
+#ifdef CONFIG_AUTONUMA
+/* NUMA hinting page fault entry point for trans huge pmds */
+pmd_t __huge_pmd_numa_fixup(struct mm_struct *mm, unsigned long addr,
+ pmd_t pmd, pmd_t *pmdp)
+{
+ spin_lock(&mm->page_table_lock);
+ if (pmd_same(pmd, *pmdp)) {
+ struct page *page = pmd_page(pmd);
+ pmd = pmd_mknonnuma(pmd);
+ set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmdp, pmd);
+ numa_hinting_fault(page, HPAGE_PMD_NR);
+ VM_BUG_ON(pmd_numa(pmd));
+ }
+ spin_unlock(&mm->page_table_lock);
+ return pmd;
+}
+#endif
+
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
diff --git a/mm/memory.c b/mm/memory.c
index 71282f5..00f1ae7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
#include <linux/swapops.h>
#include <linux/elf.h>
#include <linux/gfp.h>
+#include <linux/autonuma.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
@@ -3418,6 +3419,31 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
+static inline pte_t pte_numa_fixup(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long addr, pte_t pte, pte_t *ptep)
+{
+ if (pte_numa(pte))
+ pte = __pte_numa_fixup(mm, vma, addr, pte, ptep);
+ return pte;
+}
+
+static inline void pmd_numa_fixup(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmd)
+{
+ if (pmd_numa(*pmd))
+ __pmd_numa_fixup(mm, addr, pmd);
+}
+
+static inline pmd_t huge_pmd_numa_fixup(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd, pmd_t *pmdp)
+{
+ if (pmd_numa(pmd))
+ pmd = __huge_pmd_numa_fixup(mm, addr, pmd, pmdp);
+ return pmd;
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3460,6 +3486,7 @@ int handle_pte_fault(struct mm_struct *mm,
spin_lock(ptl);
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
+ entry = pte_numa_fixup(mm, vma, address, entry, pte);
if (flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address,
@@ -3530,6 +3557,8 @@ retry:
*/
orig_pmd = ACCESS_ONCE(*pmd);
if (pmd_trans_huge(orig_pmd)) {
+ orig_pmd = huge_pmd_numa_fixup(mm, address,
+ orig_pmd, pmd);
if (flags & FAULT_FLAG_WRITE &&
!pmd_write(orig_pmd) &&
!pmd_trans_splitting(orig_pmd)) {
@@ -3548,6 +3577,8 @@ retry:
}
}
+ pmd_numa_fixup(mm, address, pmd);
+
/*
* Use __pte_alloc instead of pte_alloc_map, because we can't
* run pte_offset_map on the pmd, if an huge pmd could
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists