linux-kernel - [PATCH 29/35] autonuma: numa hinting page faults entry points

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1337965359-29725-30-git-send-email-aarcange@redhat.com>
Date:	Fri, 25 May 2012 19:02:33 +0200
From:	Andrea Arcangeli <aarcange@...hat.com>
To:	linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc:	Hillf Danton <dhillf@...il.com>, Dan Smith <danms@...ibm.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...e.hu>, Paul Turner <pjt@...gle.com>,
	Suresh Siddha <suresh.b.siddha@...el.com>,
	Mike Galbraith <efault@....de>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Lai Jiangshan <laijs@...fujitsu.com>,
	Bharata B Rao <bharata.rao@...il.com>,
	Lee Schermerhorn <Lee.Schermerhorn@...com>,
	Rik van Riel <riel@...hat.com>,
	Johannes Weiner <hannes@...xchg.org>,
	Srivatsa Vaddagiri <vatsa@...ux.vnet.ibm.com>,
	Christoph Lameter <cl@...ux.com>
Subject: [PATCH 29/35] autonuma: numa hinting page faults entry points

This is where the numa hinting page faults are detected and are passed
over to the AutoNUMA core logic.

Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
---
 include/linux/huge_mm.h |    2 ++
 mm/huge_memory.c        |   17 +++++++++++++++++
 mm/memory.c             |   32 ++++++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c8af7a2..72eac1d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,6 +11,8 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			       unsigned long address, pmd_t *pmd,
 			       pmd_t orig_pmd);
+extern pmd_t __huge_pmd_numa_fixup(struct mm_struct *mm, unsigned long addr,
+				   pmd_t pmd, pmd_t *pmdp);
 extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm);
 extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
 					  unsigned long addr,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 76bdc48..017c0a3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1030,6 +1030,23 @@ out:
 	return page;
 }
 
+#ifdef CONFIG_AUTONUMA
+pmd_t __huge_pmd_numa_fixup(struct mm_struct *mm, unsigned long addr,
+			    pmd_t pmd, pmd_t *pmdp)
+{
+	spin_lock(&mm->page_table_lock);
+	if (pmd_same(pmd, *pmdp)) {
+		struct page *page = pmd_page(pmd);
+		pmd = pmd_mknotnuma(pmd);
+		set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmdp, pmd);
+		numa_hinting_fault(page, HPAGE_PMD_NR);
+		VM_BUG_ON(pmd_numa(pmd));
+	}
+	spin_unlock(&mm->page_table_lock);
+	return pmd;
+}
+#endif
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
diff --git a/mm/memory.c b/mm/memory.c
index e3aa47c..316ce54 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/autonuma.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3398,6 +3399,32 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
+static inline pte_t pte_numa_fixup(struct mm_struct *mm,
+				   struct vm_area_struct *vma,
+				   unsigned long addr, pte_t pte, pte_t *ptep)
+{
+	if (pte_numa(pte))
+		pte = __pte_numa_fixup(mm, vma, addr, pte, ptep);
+	return pte;
+}
+
+static inline void pmd_numa_fixup(struct mm_struct *mm,
+				  struct vm_area_struct *vma,
+				  unsigned long addr, pmd_t *pmd)
+{
+	if (pmd_numa(*pmd))
+		__pmd_numa_fixup(mm, vma, addr, pmd);
+}
+
+static inline pmd_t huge_pmd_numa_fixup(struct mm_struct *mm,
+					unsigned long addr,
+					pmd_t pmd, pmd_t *pmdp)
+{
+	if (pmd_numa(pmd))
+		pmd = __huge_pmd_numa_fixup(mm, addr, pmd, pmdp);
+	return pmd;
+}
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3440,6 +3467,7 @@ int handle_pte_fault(struct mm_struct *mm,
 	spin_lock(ptl);
 	if (unlikely(!pte_same(*pte, entry)))
 		goto unlock;
+	entry = pte_numa_fixup(mm, vma, address, entry, pte);
 	if (flags & FAULT_FLAG_WRITE) {
 		if (!pte_write(entry))
 			return do_wp_page(mm, vma, address,
@@ -3501,6 +3529,8 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		pmd_t orig_pmd = *pmd;
 		barrier();
 		if (pmd_trans_huge(orig_pmd)) {
+			orig_pmd = huge_pmd_numa_fixup(mm, address,
+						       orig_pmd, pmd);
 			if (flags & FAULT_FLAG_WRITE &&
 			    !pmd_write(orig_pmd) &&
 			    !pmd_trans_splitting(orig_pmd))
@@ -3510,6 +3540,8 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 	}
 
+	pmd_numa_fixup(mm, vma, address, pmd);
+
 	/*
 	 * Use __pte_alloc instead of pte_alloc_map, because we can't
 	 * run pte_offset_map on the pmd, if an huge pmd could
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/