[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250925025744.6807-2-xieyuanbin1@huawei.com>
Date: Thu, 25 Sep 2025 10:57:44 +0800
From: Xie Yuanbin <xieyuanbin1@...wei.com>
To: <rmk+kernel@...linux.org.uk>, <linux@...linux.org.uk>, <rppt@...nel.org>,
<vbabka@...e.cz>, <pfalcato@...e.de>, <brauner@...nel.org>,
<lorenzo.stoakes@...cle.com>, <kuninori.morimoto.gx@...esas.com>,
<tony@...mide.com>, <arnd@...db.de>, <bigeasy@...utronix.de>,
<akpm@...ux-foundation.org>, <punitagrawal@...il.com>, <rjw@...ysocki.net>,
<marc.zyngier@....com>
CC: <will@...nel.org>, <linux-arm-kernel@...ts.infradead.org>,
<linux-kernel@...r.kernel.org>, <liaohua4@...wei.com>,
<lilinjie8@...wei.com>, <xieyuanbin1@...wei.com>
Subject: [PATCH v2 2/2] ARM: mm: Optimize page_fault to reduce the impact of spectre-v2 bugfix
Submitting this optimized performance commit is because of
last spectre-v2 bugfix, which adds a branch to the hot code path
and may cause performance degradation.
It mainly does the following things:
1. Extract the judgment of user_mode and reduce some redundant branch.
For example, for user mode, interrupts must be enabled, and
`(faulthandler_disabled() || !mm)` is impossible. I think some other
branches can also be optimized, such as `(fsr & FSR_LNX_PF)` and
`ttbr0_usermode_access_allowed()`, but I'm not sure. Among them,
`ttbr0_usermode_access_allowed()` is after `perf_sw_event()`, I think
these two are not dependent. If `ttbr0_usermode_access_allowed()` can be
placed front, it can also be optimized into the kernel mode branch.
2. Add some like/unlikely.
3. `__do_user_fault` is cold code path, inlining may lead to negative
optimization, so add noinline.
I also want to do this for `__do_kernel_fault`, but it seems that
`fixup_exception()` in `__do_kernel_fault` is not cold code, which may be
triggered by `copy_from_user_nofault`.
Signed-off-by: Xie Yuanbin <xieyuanbin1@...wei.com>
---
arch/arm/mm/fault.c | 54 +++++++++++++++++++++++----------------------
1 file changed, 28 insertions(+), 26 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index e4dc7c2cfe75..09dde89a88ed 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -147,21 +147,21 @@ static void die_kernel_fault(const char *msg, struct mm_struct *mm,
* Oops. The kernel tried to access some page that wasn't present.
*/
static void
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
const char *msg;
/*
* Are we prepared to handle this kernel fault?
*/
- if (fixup_exception(regs))
+ if (likely(fixup_exception(regs)))
return;
/*
* No handler, we'll have to terminate things with extreme prejudice.
*/
if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
} else {
if (is_translation_fault(fsr) &&
kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
@@ -170,21 +170,21 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
msg = "paging request";
}
die_kernel_fault(msg, mm, addr, fsr, regs);
}
/*
* Something tried to access memory that isn't in our memory map..
* User mode accesses just cause a SIGSEGV
*/
-static void
+static noinline void
__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
int code, struct pt_regs *regs)
{
struct task_struct *tsk = current;
if (addr > TASK_SIZE)
harden_branch_predictor();
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
@@ -265,135 +265,137 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int sig, code;
vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
vm_flags_t vm_flags = VM_ACCESS_FLAGS;
if (kprobe_page_fault(regs, fsr))
return 0;
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
- if (unlikely(addr > TASK_SIZE) && user_mode(regs)) {
- fault = 0;
- code = SEGV_MAPERR;
- goto bad_area;
- }
-#endif
+ if (user_mode(regs)) {
+ if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) && unlikely(addr > TASK_SIZE)) {
+ fault = 0;
+ code = SEGV_MAPERR;
+ goto bad_area;
+ }
- /* Enable interrupts if they were enabled in the parent context. */
- if (interrupts_enabled(regs))
+ /* Enable interrupts if they were enabled in the parent context. */
local_irq_enable();
- /*
- * If we're in an interrupt or have no user
- * context, we must not take the fault..
- */
- if (faulthandler_disabled() || !mm)
- goto no_context;
-
- if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ } else {
+ /* Enable interrupts if they were enabled in the parent context. */
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
+ /*
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+ if (faulthandler_disabled() || unlikely(!mm))
+ goto no_context;
+ }
if (is_write_fault(fsr)) {
flags |= FAULT_FLAG_WRITE;
vm_flags = VM_WRITE;
}
if (fsr & FSR_LNX_PF) {
vm_flags = VM_EXEC;
- if (is_permission_fault(fsr) && !user_mode(regs))
+ if (unlikely(is_permission_fault(fsr)) && !user_mode(regs))
die_kernel_fault("execution of memory",
mm, addr, fsr, regs);
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
/*
* Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are
* routed via the translation fault mechanism. Check whether uaccess
* is disabled while in kernel mode.
*/
- if (!ttbr0_usermode_access_allowed(regs))
+ if (unlikely(!ttbr0_usermode_access_allowed(regs)))
goto no_context;
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
- if (!(vma->vm_flags & vm_flags)) {
+ if (unlikely(!(vma->vm_flags & vm_flags))) {
vma_end_read(vma);
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
fault = 0;
code = SEGV_ACCERR;
goto bad_area;
}
fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
vma_end_read(vma);
if (!(fault & VM_FAULT_RETRY)) {
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
- if (fault_signal_pending(fault, regs)) {
+ if (unlikely(fault_signal_pending(fault, regs))) {
if (!user_mode(regs))
goto no_context;
return 0;
}
lock_mmap:
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
fault = 0;
code = SEGV_MAPERR;
goto bad_area;
}
/*
* ok, we have a good vm_area for this memory access, check the
* permissions on the VMA allow for the fault which occurred.
*/
- if (!(vma->vm_flags & vm_flags)) {
+ if (unlikely(!(vma->vm_flags & vm_flags))) {
mmap_read_unlock(mm);
fault = 0;
code = SEGV_ACCERR;
goto bad_area;
}
fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
* it would already be released in __lock_page_or_retry in
* mm/filemap.c. */
- if (fault_signal_pending(fault, regs)) {
+ if (unlikely(fault_signal_pending(fault, regs))) {
if (!user_mode(regs))
goto no_context;
return 0;
}
/* The fault is fully completed (including releasing mmap lock) */
if (fault & VM_FAULT_COMPLETED)
return 0;
- if (!(fault & VM_FAULT_ERROR)) {
+ if (likely(!(fault & VM_FAULT_ERROR))) {
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
mmap_read_unlock(mm);
done:
/* Handle the "normal" case first */
--
2.48.1
Powered by blists - more mailing lists