lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220128131006.67712-32-michel@lespinasse.org>
Date:   Fri, 28 Jan 2022 05:10:02 -0800
From:   Michel Lespinasse <michel@...pinasse.org>
To:     Linux-MM <linux-mm@...ck.org>, linux-kernel@...r.kernel.org,
        Andrew Morton <akpm@...ux-foundation.org>
Cc:     kernel-team@...com, Laurent Dufour <ldufour@...ux.ibm.com>,
        Jerome Glisse <jglisse@...gle.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Michal Hocko <mhocko@...e.com>,
        Vlastimil Babka <vbabka@...e.cz>,
        Davidlohr Bueso <dave@...olabs.net>,
        Matthew Wilcox <willy@...radead.org>,
        Liam Howlett <liam.howlett@...cle.com>,
        Rik van Riel <riel@...riel.com>,
        Paul McKenney <paulmck@...nel.org>,
        Song Liu <songliubraving@...com>,
        Suren Baghdasaryan <surenb@...gle.com>,
        Minchan Kim <minchan@...gle.com>,
        Joel Fernandes <joelaf@...gle.com>,
        David Rientjes <rientjes@...gle.com>,
        Axel Rasmussen <axelrasmussen@...gle.com>,
        Andy Lutomirski <luto@...nel.org>,
        Michel Lespinasse <michel@...pinasse.org>
Subject: [PATCH v2 31/35] mm: anon spf statistics

Add a new CONFIG_SPECULATIVE_PAGE_FAULT_STATS config option,
and dump extra statistics about executed spf cases and abort reasons
when the option is set.

Signed-off-by: Michel Lespinasse <michel@...pinasse.org>
---
 arch/x86/mm/fault.c           | 18 ++++++++---
 include/linux/mmap_lock.h     | 19 ++++++++++--
 include/linux/vm_event.h      |  6 ++++
 include/linux/vm_event_item.h | 21 +++++++++++++
 mm/Kconfig.debug              |  7 +++++
 mm/memory.c                   | 56 ++++++++++++++++++++++++++++-------
 mm/vmstat.c                   | 21 +++++++++++++
 7 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d6f8d4967c49..a5a19561c319 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1337,21 +1337,31 @@ void do_user_addr_fault(struct pt_regs *regs,
 
 	count_vm_event(SPF_ATTEMPT);
 	seq = mmap_seq_read_start(mm);
-	if (seq & 1)
+	if (seq & 1) {
+		count_vm_spf_event(SPF_ABORT_ODD);
 		goto spf_abort;
+	}
 	rcu_read_lock();
 	vma = __find_vma(mm, address);
-	if (!vma || vma->vm_start > address || !vma_is_anonymous(vma)) {
+	if (!vma || vma->vm_start > address) {
 		rcu_read_unlock();
+		count_vm_spf_event(SPF_ABORT_UNMAPPED);
+		goto spf_abort;
+	}
+	if (!vma_is_anonymous(vma)) {
+		rcu_read_unlock();
+		count_vm_spf_event(SPF_ABORT_NO_SPECULATE);
 		goto spf_abort;
 	}
 	pvma = *vma;
 	rcu_read_unlock();
-	if (!mmap_seq_read_check(mm, seq))
+	if (!mmap_seq_read_check(mm, seq, SPF_ABORT_VMA_COPY))
 		goto spf_abort;
 	vma = &pvma;
-	if (unlikely(access_error(error_code, vma)))
+	if (unlikely(access_error(error_code, vma))) {
+		count_vm_spf_event(SPF_ABORT_ACCESS_ERROR);
 		goto spf_abort;
+	}
 	fault = do_handle_mm_fault(vma, address,
 				   flags | FAULT_FLAG_SPECULATIVE, seq, regs);
 
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index a2459eb15a33..747805ce07b8 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -7,6 +7,7 @@
 #include <linux/rwsem.h>
 #include <linux/tracepoint-defs.h>
 #include <linux/types.h>
+#include <linux/vm_event.h>
 
 #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
 #define MMAP_LOCK_SEQ_INITIALIZER(name) \
@@ -104,12 +105,26 @@ static inline unsigned long mmap_seq_read_start(struct mm_struct *mm)
 	return seq;
 }
 
-static inline bool mmap_seq_read_check(struct mm_struct *mm, unsigned long seq)
+static inline bool __mmap_seq_read_check(struct mm_struct *mm,
+					 unsigned long seq)
 {
 	smp_rmb();
 	return seq == READ_ONCE(mm->mmap_seq);
 }
-#endif
+
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT_STATS
+static inline bool mmap_seq_read_check(struct mm_struct *mm, unsigned long seq,
+	enum vm_event_item fail_event)
+{
+	if (__mmap_seq_read_check(mm, seq))
+		return true;
+	count_vm_event(fail_event);
+	return false;
+}
+#else
+#define mmap_seq_read_check(mm, seq, fail) __mmap_seq_read_check(mm, seq)
+#endif /* CONFIG_SPECULATIVE_PAGE_FAULT_STATS */
+#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
 
 static inline void mmap_write_lock(struct mm_struct *mm)
 {
diff --git a/include/linux/vm_event.h b/include/linux/vm_event.h
index b3ae108a3841..689a21387dad 100644
--- a/include/linux/vm_event.h
+++ b/include/linux/vm_event.h
@@ -77,6 +77,12 @@ static inline void vm_events_fold_cpu(int cpu)
 
 #endif /* CONFIG_VM_EVENT_COUNTERS */
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT_STATS
+#define count_vm_spf_event(x) count_vm_event(x)
+#else
+#define count_vm_spf_event(x) do {} while (0)
+#endif
+
 #ifdef CONFIG_NUMA_BALANCING
 #define count_vm_numa_event(x)     count_vm_event(x)
 #define count_vm_numa_events(x, y) count_vm_events(x, y)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index f00b3e36ff39..0390b81b1e71 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -137,6 +137,27 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
 		SPF_ATTEMPT,
 		SPF_ABORT,
+#endif
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT_STATS
+		SPF_ABORT_ODD,
+		SPF_ABORT_UNMAPPED,
+		SPF_ABORT_NO_SPECULATE,
+		SPF_ABORT_VMA_COPY,
+		SPF_ABORT_ACCESS_ERROR,
+		SPF_ABORT_PUD,
+		SPF_ABORT_PMD,
+		SPF_ABORT_ANON_VMA,
+		SPF_ABORT_PTE_MAP_LOCK_SEQ1,
+		SPF_ABORT_PTE_MAP_LOCK_PMD,
+		SPF_ABORT_PTE_MAP_LOCK_PTL,
+		SPF_ABORT_PTE_MAP_LOCK_SEQ2,
+		SPF_ABORT_USERFAULTFD,
+		SPF_ABORT_FAULT,
+		SPF_ABORT_SWAP,
+		SPF_ATTEMPT_ANON,
+		SPF_ATTEMPT_NUMA,
+		SPF_ATTEMPT_PTE,
+		SPF_ATTEMPT_WP,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 5bd5bb097252..73b61cc95562 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -174,3 +174,10 @@ config PTDUMP_DEBUGFS
 	  kernel.
 
 	  If in doubt, say N.
+
+config SPECULATIVE_PAGE_FAULT_STATS
+	bool "Additional statistics for speculative page faults"
+	depends on SPECULATIVE_PAGE_FAULT
+	help
+	  Additional statistics for speculative page faults.
+	  If in doubt, say N.
diff --git a/mm/memory.c b/mm/memory.c
index 7f8dbd729dce..a5754309eaae 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2762,7 +2762,8 @@ bool __pte_map_lock(struct vm_fault *vmf)
 	}
 
 	speculative_page_walk_begin();
-	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq))
+	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq,
+				 SPF_ABORT_PTE_MAP_LOCK_SEQ1))
 		goto fail;
 	/*
 	 * The mmap sequence count check guarantees that the page
@@ -2775,8 +2776,10 @@ bool __pte_map_lock(struct vm_fault *vmf)
 	 * is not a huge collapse operation in progress in our back.
 	 */
 	pmdval = READ_ONCE(*vmf->pmd);
-	if (!pmd_same(pmdval, vmf->orig_pmd))
+	if (!pmd_same(pmdval, vmf->orig_pmd)) {
+		count_vm_spf_event(SPF_ABORT_PTE_MAP_LOCK_PMD);
 		goto fail;
+	}
 #endif
 	ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
 	if (!pte)
@@ -2793,9 +2796,12 @@ bool __pte_map_lock(struct vm_fault *vmf)
 	 * We also don't want to retry until spin_trylock() succeeds,
 	 * because of the starvation potential against a stream of lockers.
 	 */
-	if (unlikely(!spin_trylock(ptl)))
+	if (unlikely(!spin_trylock(ptl))) {
+		count_vm_spf_event(SPF_ABORT_PTE_MAP_LOCK_PTL);
 		goto fail;
-	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq))
+	}
+	if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq,
+				 SPF_ABORT_PTE_MAP_LOCK_SEQ2))
 		goto unlock_fail;
 	speculative_page_walk_end();
 	vmf->pte = pte;
@@ -3091,6 +3097,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 
 	if (unlikely(!vma->anon_vma)) {
 		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+			count_vm_spf_event(SPF_ABORT_ANON_VMA);
 			ret = VM_FAULT_RETRY;
 			goto out;
 		}
@@ -3367,10 +3374,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
+	if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		count_vm_spf_event(SPF_ATTEMPT_WP);
+
 	if (userfaultfd_pte_wp(vma, *vmf->pte)) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
-		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+			count_vm_spf_event(SPF_ABORT_USERFAULTFD);
 			return VM_FAULT_RETRY;
+		}
 		return handle_userfault(vmf, VM_UFFD_WP);
 	}
 
@@ -3620,6 +3632,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 
 	if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
 		pte_unmap(vmf->pte);
+		count_vm_spf_event(SPF_ABORT_SWAP);
 		return VM_FAULT_RETRY;
 	}
 
@@ -3852,6 +3865,9 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	vm_fault_t ret = 0;
 	pte_t entry;
 
+	if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		count_vm_spf_event(SPF_ATTEMPT_ANON);
+
 	/* File mapping without ->vm_ops ? */
 	if (vma->vm_flags & VM_SHARED)
 		return VM_FAULT_SIGBUS;
@@ -3881,8 +3897,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	} else {
 		/* Allocate our own private page. */
 		if (unlikely(!vma->anon_vma)) {
-			if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+			if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+				count_vm_spf_event(SPF_ABORT_ANON_VMA);
 				return VM_FAULT_RETRY;
+			}
 			if (__anon_vma_prepare(vma))
 				goto oom;
 		}
@@ -3925,8 +3943,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		if (page)
 			put_page(page);
-		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+			count_vm_spf_event(SPF_ABORT_USERFAULTFD);
 			return VM_FAULT_RETRY;
+		}
 		return handle_userfault(vmf, VM_UFFD_MISSING);
 	}
 
@@ -4470,6 +4490,9 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	bool was_writable = pte_savedwrite(vmf->orig_pte);
 	int flags = 0;
 
+	if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		count_vm_spf_event(SPF_ATTEMPT_NUMA);
+
 	/*
 	 * The "pte" at this point cannot be used safely without
 	 * validation through pte_unmap_same(). It's of NUMA type but
@@ -4651,6 +4674,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
 		return do_numa_page(vmf);
 
+	if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+		count_vm_spf_event(SPF_ATTEMPT_PTE);
+
 	if (!pte_spinlock(vmf))
 		return VM_FAULT_RETRY;
 	entry = vmf->orig_pte;
@@ -4718,20 +4744,26 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		speculative_page_walk_begin();
 		pgd = pgd_offset(mm, address);
 		pgdval = READ_ONCE(*pgd);
-		if (pgd_none(pgdval) || unlikely(pgd_bad(pgdval)))
+		if (pgd_none(pgdval) || unlikely(pgd_bad(pgdval))) {
+			count_vm_spf_event(SPF_ABORT_PUD);
 			goto spf_fail;
+		}
 
 		p4d = p4d_offset(pgd, address);
 		p4dval = READ_ONCE(*p4d);
-		if (p4d_none(p4dval) || unlikely(p4d_bad(p4dval)))
+		if (p4d_none(p4dval) || unlikely(p4d_bad(p4dval))) {
+			count_vm_spf_event(SPF_ABORT_PUD);
 			goto spf_fail;
+		}
 
 		vmf.pud = pud_offset(p4d, address);
 		pudval = READ_ONCE(*vmf.pud);
 		if (pud_none(pudval) || unlikely(pud_bad(pudval)) ||
 		    unlikely(pud_trans_huge(pudval)) ||
-		    unlikely(pud_devmap(pudval)))
+		    unlikely(pud_devmap(pudval))) {
+			count_vm_spf_event(SPF_ABORT_PUD);
 			goto spf_fail;
+		}
 
 		vmf.pmd = pmd_offset(vmf.pud, address);
 		vmf.orig_pmd = READ_ONCE(*vmf.pmd);
@@ -4749,8 +4781,10 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		if (unlikely(pmd_none(vmf.orig_pmd) ||
 			     is_swap_pmd(vmf.orig_pmd) ||
 			     pmd_trans_huge(vmf.orig_pmd) ||
-			     pmd_devmap(vmf.orig_pmd)))
+			     pmd_devmap(vmf.orig_pmd))) {
+			count_vm_spf_event(SPF_ABORT_PMD);
 			goto spf_fail;
+		}
 
 		/*
 		 * The above does not allocate/instantiate page-tables because
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dbb0160e5558..20ac17cf582a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1394,6 +1394,27 @@ const char * const vmstat_text[] = {
 	"spf_attempt",
 	"spf_abort",
 #endif
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT_STATS
+	"SPF_ABORT_ODD",
+	"SPF_ABORT_UNMAPPED",
+	"SPF_ABORT_NO_SPECULATE",
+	"SPF_ABORT_VMA_COPY",
+	"SPF_ABORT_ACCESS_ERROR",
+	"SPF_ABORT_PUD",
+	"SPF_ABORT_PMD",
+	"SPF_ABORT_ANON_VMA",
+	"SPF_ABORT_PTE_MAP_LOCK_SEQ1",
+	"SPF_ABORT_PTE_MAP_LOCK_PMD",
+	"SPF_ABORT_PTE_MAP_LOCK_PTL",
+	"SPF_ABORT_PTE_MAP_LOCK_SEQ2",
+	"SPF_ABORT_USERFAULTFD",
+	"SPF_ABORT_FAULT",
+	"SPF_ABORT_SWAP",
+	"SPF_ATTEMPT_ANON",
+	"SPF_ATTEMPT_NUMA",
+	"SPF_ATTEMPT_PTE",
+	"SPF_ATTEMPT_WP",
+#endif
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ