lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200619161402.10004-1-peterx@redhat.com>
Date:   Fri, 19 Jun 2020 12:14:02 -0400
From:   Peter Xu <peterx@...hat.com>
To:     linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc:     Peter Xu <peterx@...hat.com>,
        Gerald Schaefer <gerald.schaefer@...ibm.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Will Deacon <will@...nel.org>,
        Michael Ellerman <mpe@...erman.id.au>,
        Linus Torvalds <torvalds@...ux-foundation.org>
Subject: [PATCH 25/26] mm: Clean up the last pieces of page fault accountings

Here're the last pieces of page fault accounting that were still done outside
handle_mm_fault() where we still have regs==NULL when calling handle_mm_fault():

arch/powerpc/mm/copro_fault.c:   copro_handle_mm_fault
arch/sparc/mm/fault_32.c:        force_user_fault
arch/um/kernel/trap.c:           handle_page_fault
mm/gup.c:                        faultin_page
                                 fixup_user_fault
mm/hmm.c:                        hmm_vma_fault
mm/ksm.c:                        break_ksm

Some of them has the issue of duplicated accounting for page fault retries.
Some of them didn't do the accounting at all.

This patch cleans all these up by letting handle_mm_fault() to do per-task page
fault accounting even if regs==NULL (though we'll still skip the perf event
accountings).  With that, we can safely remove all the outliers now.

There's another functional change in that now we account the page faults to the
caller of gup, rather than the task_struct that passed into the gup code.  More
information of this can be found at [1].

After this patch, below things should never be touched again outside
handle_mm_fault():

  - task_struct.[maj|min]_flt
  - PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN]

[1] https://lore.kernel.org/lkml/CAHk-=wj_V2Tps2QrMn20_W0OJF9xqNh52XSGA42s-ZJ8Y+GyKw@mail.gmail.com/

Signed-off-by: Peter Xu <peterx@...hat.com>
---
 arch/powerpc/mm/copro_fault.c |  5 -----
 arch/um/kernel/trap.c         |  4 ----
 mm/gup.c                      | 13 -------------
 mm/memory.c                   | 20 ++++++++++++--------
 4 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index c0478bef1f14..2e59be1a9359 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
 		BUG();
 	}
 
-	if (*flt & VM_FAULT_MAJOR)
-		current->maj_flt++;
-	else
-		current->min_flt++;
-
 out_unlock:
 	up_read(&mm->mmap_sem);
 	return ret;
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 32cc8f59322b..c881831de357 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -92,10 +92,6 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 			BUG();
 		}
 		if (flags & FAULT_FLAG_ALLOW_RETRY) {
-			if (fault & VM_FAULT_MAJOR)
-				current->maj_flt++;
-			else
-				current->min_flt++;
 			if (fault & VM_FAULT_RETRY) {
 				flags |= FAULT_FLAG_TRIED;
 
diff --git a/mm/gup.c b/mm/gup.c
index 1a48c639ea49..17b4d0c45a6b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -885,13 +885,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		BUG();
 	}
 
-	if (tsk) {
-		if (ret & VM_FAULT_MAJOR)
-			tsk->maj_flt++;
-		else
-			tsk->min_flt++;
-	}
-
 	if (ret & VM_FAULT_RETRY) {
 		if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
 			*locked = 0;
@@ -1239,12 +1232,6 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 		goto retry;
 	}
 
-	if (tsk) {
-		if (major)
-			tsk->maj_flt++;
-		else
-			tsk->min_flt++;
-	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(fixup_user_fault);
diff --git a/mm/memory.c b/mm/memory.c
index 23c738b3756e..59a2989231fa 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4350,6 +4350,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 /**
  * mm_account_fault - Do page fault accountings
  * @regs: the pt_regs struct pointer.  When set to NULL, will skip accounting
+ *        of perf event counters, but we'll still do the per-task accounting to
+ *        the task who triggered this page fault.
  * @address: faulted address.
  * @major: whether this is a major fault.
  *
@@ -4365,16 +4367,18 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 static inline void mm_account_fault(struct pt_regs *regs,
 				    unsigned long address, bool major)
 {
+	if (major)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+
 	if (!regs)
 		return;
 
-	if (major) {
-		current->maj_flt++;
+	if (major)
 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
-	} else {
-		current->min_flt++;
+	else
 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-	}
 }
 
 /*
@@ -4450,9 +4454,9 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	 *    immediately previously).
 	 *
 	 *  - if the fault is done for GUP, regs wil be NULL and
-	 *    no accounting will be done (but you _could_ pass in
-	 *    your own regs and it would be accounted to the thread
-	 *    doing the fault, not to the target!)
+	 *    we only do the accounting for the per thread fault
+	 *    counters who triggered the fault, and we skip the
+	 *    perf event updates.
 	 */
 	mm_account_fault(regs, address, (ret & VM_FAULT_MAJOR) ||
 			 (flags & FAULT_FLAG_TRIED));
-- 
2.26.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ