[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEEQ3w=V6-d+YSWP=0WMt6UAZexrazq0UQjdyUmS3AnMtkdoKQ@mail.gmail.com>
Date: Tue, 8 Jul 2025 18:07:27 +0800
From: yunhui cui <cuiyunhui@...edance.com>
To: Radim Krčmář <rkrcmar@...tanamicro.com>
Cc: masahiroy@...nel.org, nathan@...nel.org, nicolas.schier@...ux.dev,
dennis@...nel.org, tj@...nel.org, cl@...two.org, paul.walmsley@...ive.com,
palmer@...belt.com, aou@...s.berkeley.edu, alex@...ti.fr, andybnac@...il.com,
bjorn@...osinc.com, cyrilbur@...storrent.com, rostedt@...dmis.org,
puranjay@...nel.org, ben.dooks@...ethink.co.uk, zhangchunyan@...as.ac.cn,
ruanjinjie@...wei.com, jszhang@...nel.org, charlie@...osinc.com,
cleger@...osinc.com, antonb@...storrent.com, ajones@...tanamicro.com,
debug@...osinc.com, haibo1.xu@...el.com, samuel.holland@...ive.com,
linux-kbuild@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-mm@...ck.org, linux-riscv@...ts.infradead.org,
linux-riscv <linux-riscv-bounces@...ts.infradead.org>, wangziang.ok@...edance.com
Subject: Re: [External] [PATCH] RISC-V: store percpu offset in CSR_SCRATCH
Hi Radim,
On Mon, Jul 7, 2025 at 8:50 PM Radim Krčmář <rkrcmar@...tanamicro.com> wrote:
>
> 2025-07-04T16:45:00+08:00, Yunhui Cui <cuiyunhui@...edance.com>:
> > The following data was collected from tests conducted on the
> > Spacemit(R) X60 using the fixed register method:
> > [...]
> > The fixed register method reduced performance by 5.29%.
> > The per-CPU offset optimization improved performance by 2.52%.
>
> What is the performance if you use the scratch register?
>
> The patch below is completely unoptimized as I didn't want to shuffle
> code around too much, but it could give a rough idea.
>
> Thanks.
>
> ---8<---
> The scratch register currently denotes the mode before exception, but we
> can just use two different exception entry points to provide the same
> information, which frees the scratch register for the percpu offset.
>
> The user/kernel entry paths need more through rewrite, because they are
> terribly wasteful right now.
> ---
> Applies on top of d7b8f8e20813f0179d8ef519541a3527e7661d3a (v6.16-rc5)
>
> arch/riscv/include/asm/percpu.h | 13 ++++++++++
> arch/riscv/kernel/entry.S | 46 ++++++++++++++++++++-------------
> arch/riscv/kernel/head.S | 7 +----
> arch/riscv/kernel/smpboot.c | 7 +++++
> arch/riscv/kernel/stacktrace.c | 4 +--
> 5 files changed, 51 insertions(+), 26 deletions(-)
> create mode 100644 arch/riscv/include/asm/percpu.h
>
> diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
> new file mode 100644
> index 000000000000..2c838514e3ea
> --- /dev/null
> +++ b/arch/riscv/include/asm/percpu.h
> @@ -0,0 +1,13 @@
> +#ifndef __ASM_PERCPU_H
> +#define __ASM_PERCPU_H
> +
> +static inline void set_my_cpu_offset(unsigned long off)
> +{
> + csr_write(CSR_SCRATCH, off);
> +}
> +
> +#define __my_cpu_offset csr_read(CSR_SCRATCH)
> +
> +#include <asm-generic/percpu.h>
> +
> +#endif
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 75656afa2d6b..e48c553d6779 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -91,18 +91,8 @@
> REG_L a0, TASK_TI_A0(tp)
> .endm
>
> -
> -SYM_CODE_START(handle_exception)
> - /*
> - * If coming from userspace, preserve the user thread pointer and load
> - * the kernel thread pointer. If we came from the kernel, the scratch
> - * register will contain 0, and we should continue on the current TP.
> - */
> - csrrw tp, CSR_SCRATCH, tp
> - bnez tp, .Lsave_context
> -
> -.Lrestore_kernel_tpsp:
> - csrr tp, CSR_SCRATCH
> +SYM_CODE_START(handle_kernel_exception)
> + csrw CSR_SCRATCH, tp
>
> #ifdef CONFIG_64BIT
> /*
> @@ -126,8 +116,22 @@ SYM_CODE_START(handle_exception)
> bnez sp, handle_kernel_stack_overflow
> REG_L sp, TASK_TI_KERNEL_SP(tp)
> #endif
> + j handle_exception
> +ASM_NOKPROBE(handle_kernel_exception)
> +SYM_CODE_END(handle_kernel_exception)
>
> -.Lsave_context:
> +SYM_CODE_START(handle_user_exception)
> + /*
> + * If coming from userspace, preserve the user thread pointer and load
> + * the kernel thread pointer.
> + */
> + csrrw tp, CSR_SCRATCH, tp
> + j handle_exception
> +
> +SYM_CODE_END(handle_user_exception)
> +ASM_NOKPROBE(handle_user_exception)
> +
> +SYM_CODE_START_NOALIGN(handle_exception)
> REG_S sp, TASK_TI_USER_SP(tp)
> REG_L sp, TASK_TI_KERNEL_SP(tp)
> addi sp, sp, -(PT_SIZE_ON_STACK)
> @@ -158,11 +162,15 @@ SYM_CODE_START(handle_exception)
> REG_S s4, PT_CAUSE(sp)
> REG_S s5, PT_TP(sp)
>
> - /*
> - * Set the scratch register to 0, so that if a recursive exception
> - * occurs, the exception vector knows it came from the kernel
> - */
> - csrw CSR_SCRATCH, x0
> + REG_L s0, TASK_TI_CPU(tp)
> + slli s0, s0, 3
> + la s1, __per_cpu_offset
> + add s1, s1, s0
> + REG_L s1, 0(s1)
> +
> + csrw CSR_SCRATCH, s1
This patch cleverly differentiates whether an exception originates
from user mode or kernel mode. However, there's still an issue with
using CSR_SCRATCH: each time handle_exception() is called, the
following instructions must be executed:
REG_L s0, TASK_TI_CPU(tp)
slli s0, s0, 3
la s1, __per_cpu_offset
add s1, s1, s0
REG_L s1, 0(s1)
csrw CSR_SCRATCH, s1
Should we consider adding a dedicated CSR (e.g., CSR_SCRATCH2) to
store the percpu offset instead?
See: https://lists.riscv.org/g/tech-privileged/topic/113437553#msg2506
> + la s1, handle_kernel_exception
> + csrw CSR_TVEC, s1
>
> /* Load the global pointer */
> load_global_pointer
> @@ -236,6 +244,8 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
> * structures again.
> */
> csrw CSR_SCRATCH, tp
> + la a0, handle_user_exception
> + csrw CSR_TVEC, a0
> 1:
> #ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
> move a0, sp
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index bdf3352acf4c..d8858334af2d 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -188,14 +188,9 @@ secondary_start_sbi:
> .align 2
> .Lsetup_trap_vector:
> /* Set trap vector to exception handler */
> - la a0, handle_exception
> + la a0, handle_kernel_exception
> csrw CSR_TVEC, a0
>
> - /*
> - * Set sup0 scratch register to 0, indicating to exception vector that
> - * we are presently executing in kernel.
> - */
> - csrw CSR_SCRATCH, zero
> ret
>
> SYM_CODE_END(_start)
> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> index 601a321e0f17..2db44b10bedb 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -41,6 +41,11 @@
>
> static DECLARE_COMPLETION(cpu_running);
>
> +void __init smp_prepare_boot_cpu(void)
> +{
> + set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
> +}
> +
> void __init smp_prepare_cpus(unsigned int max_cpus)
> {
> int cpuid;
> @@ -225,6 +230,8 @@ asmlinkage __visible void smp_callin(void)
> mmgrab(mm);
> current->active_mm = mm;
>
> + set_my_cpu_offset(per_cpu_offset(curr_cpuid));
> +
> store_cpu_topology(curr_cpuid);
> notify_cpu_starting(curr_cpuid);
>
> diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
> index 3fe9e6edef8f..69b2f390a2d4 100644
> --- a/arch/riscv/kernel/stacktrace.c
> +++ b/arch/riscv/kernel/stacktrace.c
> @@ -16,7 +16,7 @@
>
> #ifdef CONFIG_FRAME_POINTER
>
> -extern asmlinkage void handle_exception(void);
> +extern asmlinkage void handle_kernel_exception(void);
> extern unsigned long ret_from_exception_end;
>
> static inline int fp_is_valid(unsigned long fp, unsigned long sp)
> @@ -72,7 +72,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
> fp = frame->fp;
> pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
> &frame->ra);
> - if (pc >= (unsigned long)handle_exception &&
> + if (pc >= (unsigned long)handle_kernel_exception &&
> pc < (unsigned long)&ret_from_exception_end) {
> if (unlikely(!fn(arg, pc)))
> break;
Thanks,
Yunhui
Powered by blists - more mailing lists