[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251208034944.73113-4-cuiyunhui@bytedance.com>
Date: Mon, 8 Dec 2025 11:49:44 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: aou@...s.berkeley.edu,
alex@...ti.fr,
andii@...nel.org,
andybnac@...il.com,
apatel@...tanamicro.com,
ast@...nel.org,
ben.dooks@...ethink.co.uk,
bjorn@...nel.org,
bpf@...r.kernel.org,
charlie@...osinc.com,
cl@...two.org,
conor.dooley@...rochip.com,
cuiyunhui@...edance.com,
cyrilbur@...storrent.com,
daniel@...earbox.net,
debug@...osinc.com,
dennis@...nel.org,
eddyz87@...il.com,
haoluo@...gle.com,
john.fastabend@...il.com,
jolsa@...nel.org,
kpsingh@...nel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
linux-riscv@...ts.infradead.org,
linux@...musvillemoes.dk,
martin.lau@...ux.dev,
palmer@...belt.com,
pjw@...nel.org,
puranjay@...nel.org,
pulehui@...wei.com,
ruanjinjie@...wei.com,
rkrcmar@...tanamicro.com,
samuel.holland@...ive.com,
sdf@...ichev.me,
song@...nel.org,
tglx@...utronix.de,
tj@...nel.org,
thuth@...hat.com,
yonghong.song@...ux.dev,
yury.norov@...il.com,
zong.li@...ive.com
Subject: [PATCH v2 3/3] riscv: store percpu offset into thread_info
Originally we planned to add a register for the percpu offset,
which would speed up percpu variable R/W and reduce access
instructions. After discussion [1], it’s now stored in thread_info.
[1] https://lists.riscv.org/g/tech-privileged/topic/risc_v_tech_arch_review/113437553?page=2
Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
arch/riscv/include/asm/asm.h | 6 +-----
arch/riscv/include/asm/percpu.h | 4 ++++
arch/riscv/include/asm/switch_to.h | 8 ++++++++
arch/riscv/include/asm/thread_info.h | 5 +++--
arch/riscv/kernel/asm-offsets.c | 1 +
arch/riscv/kernel/smpboot.c | 7 +++++++
arch/riscv/net/bpf_jit_comp64.c | 9 +--------
7 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index e9e8ba83e632f..137a49488325e 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -91,11 +91,7 @@
#ifdef CONFIG_SMP
.macro asm_per_cpu dst sym tmp
- lw \tmp, TASK_TI_CPU_NUM(tp)
- slli \tmp, \tmp, RISCV_LGPTR
- la \dst, __per_cpu_offset
- add \dst, \dst, \tmp
- REG_L \tmp, 0(\dst)
+ REG_L \tmp, TASK_TI_PCPU_OFFSET(tp)
la \dst, \sym
add \dst, \dst, \tmp
.endm
diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
index b173729926126..18e282dded626 100644
--- a/arch/riscv/include/asm/percpu.h
+++ b/arch/riscv/include/asm/percpu.h
@@ -7,7 +7,9 @@
#include <asm/alternative-macros.h>
#include <asm/cpufeature-macros.h>
+#include <asm/current.h>
#include <asm/hwcap.h>
+#include <asm/thread_info.h>
#define PERCPU_RW_OPS(sz) \
static inline unsigned long __percpu_read_##sz(void *ptr) \
@@ -233,6 +235,8 @@ _pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
ret__; \
})
+#define __my_cpu_offset (((struct thread_info *)current)->pcpu_offset)
+
#include <asm-generic/percpu.h>
#endif /* __ASM_PERCPU_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 0e71eb82f920c..733b6cd306e40 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -88,6 +88,13 @@ static inline void __switch_to_envcfg(struct task_struct *next)
:: "r" (next->thread.envcfg) : "memory");
}
+static inline void __switch_to_pcpu_offset(struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+ next->thread_info.pcpu_offset = __my_cpu_offset;
+#endif
+}
+
extern struct task_struct *__switch_to(struct task_struct *,
struct task_struct *);
@@ -122,6 +129,7 @@ do { \
if (switch_to_should_flush_icache(__next)) \
local_flush_icache_all(); \
__switch_to_envcfg(__next); \
+ __switch_to_pcpu_offset(__next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 36918c9200c92..8d7d43cc9c405 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -52,7 +52,8 @@
*/
struct thread_info {
unsigned long flags; /* low level flags */
- int preempt_count; /* 0=>preemptible, <0=>BUG */
+ int preempt_count; /* 0=>preemptible, <0=>BUG */
+ int cpu;
/*
* These stack pointers are overwritten on every system call or
* exception. SP is also saved to the stack it can be recovered when
@@ -60,8 +61,8 @@ struct thread_info {
*/
long kernel_sp; /* Kernel stack pointer */
long user_sp; /* User stack pointer */
- int cpu;
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
+ unsigned long pcpu_offset;
#ifdef CONFIG_SHADOW_CALL_STACK
void *scs_base;
void *scs_sp;
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index af827448a609e..fbf53b66b0e06 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -38,6 +38,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_SUM, task_struct, thread.sum);
OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
+ OFFSET(TASK_TI_PCPU_OFFSET, task_struct, thread_info.pcpu_offset);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index d85916a3660c3..9e95c068b966b 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -209,6 +209,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
}
#endif
+void __init smp_prepare_boot_cpu(void)
+{
+ __my_cpu_offset = per_cpu_offset(smp_processor_id());
+}
+
void __init smp_cpus_done(unsigned int max_cpus)
{
}
@@ -234,6 +239,8 @@ asmlinkage __visible void smp_callin(void)
mmgrab(mm);
current->active_mm = mm;
+ __my_cpu_offset = per_cpu_offset(smp_processor_id());
+
#ifdef CONFIG_HOTPLUG_PARALLEL
cpuhp_ap_sync_alive();
#endif
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 5f9457e910e87..4a492a6a1cc1e 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1345,15 +1345,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
if (rd != rs)
emit_mv(rd, rs, ctx);
#ifdef CONFIG_SMP
- /* Load current CPU number in T1 */
- emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu),
+ emit_lw(RV_REG_T1, offsetof(struct thread_info, pcpu_offset),
RV_REG_TP, ctx);
- /* Load address of __per_cpu_offset array in T2 */
- emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx);
- /* Get address of __per_cpu_offset[cpu] in T1 */
- emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx);
- /* Load __per_cpu_offset[cpu] in T1 */
- emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx);
/* Add the offset to Rd */
emit_add(rd, rd, RV_REG_T1, ctx);
#endif
--
2.39.5
Powered by blists - more mailing lists