[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251208034944.73113-3-cuiyunhui@bytedance.com>
Date: Mon, 8 Dec 2025 11:49:43 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: aou@...s.berkeley.edu,
alex@...ti.fr,
andii@...nel.org,
andybnac@...il.com,
apatel@...tanamicro.com,
ast@...nel.org,
ben.dooks@...ethink.co.uk,
bjorn@...nel.org,
bpf@...r.kernel.org,
charlie@...osinc.com,
cl@...two.org,
conor.dooley@...rochip.com,
cuiyunhui@...edance.com,
cyrilbur@...storrent.com,
daniel@...earbox.net,
debug@...osinc.com,
dennis@...nel.org,
eddyz87@...il.com,
haoluo@...gle.com,
john.fastabend@...il.com,
jolsa@...nel.org,
kpsingh@...nel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
linux-riscv@...ts.infradead.org,
linux@...musvillemoes.dk,
martin.lau@...ux.dev,
palmer@...belt.com,
pjw@...nel.org,
puranjay@...nel.org,
pulehui@...wei.com,
ruanjinjie@...wei.com,
rkrcmar@...tanamicro.com,
samuel.holland@...ive.com,
sdf@...ichev.me,
song@...nel.org,
tglx@...utronix.de,
tj@...nel.org,
thuth@...hat.com,
yonghong.song@...ux.dev,
yury.norov@...il.com,
zong.li@...ive.com
Subject: [PATCH v2 2/3] riscv: introduce percpu.h into include/asm
Current percpu operations rely on generic implementations, where
raw_local_irq_save() introduces substantial overhead. Optimization
is achieved through atomic operations and preemption disabling.
Currently, since RISC-V does not support lr/sc.b/h, when ZABHA is
not supported, lr/sc.w needs to be used instead, which requires
some additional mask operations.
Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
arch/riscv/include/asm/percpu.h | 238 ++++++++++++++++++++++++++++++++
1 file changed, 238 insertions(+)
create mode 100644 arch/riscv/include/asm/percpu.h
diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
new file mode 100644
index 0000000000000..b173729926126
--- /dev/null
+++ b/arch/riscv/include/asm/percpu.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/alternative-macros.h>
+#include <asm/cpufeature-macros.h>
+#include <asm/hwcap.h>
+
+#define PERCPU_RW_OPS(sz) \
+static inline unsigned long __percpu_read_##sz(void *ptr) \
+{ \
+ return READ_ONCE(*(u##sz *)ptr); \
+} \
+ \
+static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
+{ \
+ WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
+}
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+PERCPU_RW_OPS(64)
+
+#define __PERCPU_AMO_OP_CASE(sfx, name, sz, amo_insn) \
+static inline void \
+__percpu_##name##_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ asm volatile ( \
+ "amo" #amo_insn #sfx " zero, %[val], %[ptr]" \
+ : [ptr] "+A" (*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val)) \
+ : "memory"); \
+}
+
+#define PERCPU_OP(name, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.w, name, 32, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.d, name, 64, amo_insn)
+
+PERCPU_OP(add, add)
+PERCPU_OP(andnot, and)
+PERCPU_OP(or, or)
+
+/*
+ * Currently, only this_cpu_add_return_xxx() requires a return value,
+ * and the PERCPU_RET_OP() does not account for other operations.
+ */
+#define __PERCPU_AMO_RET_OP_CASE(sfx, name, sz, amo_insn) \
+static inline u##sz \
+__percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ register u##sz ret; \
+ \
+ asm volatile ( \
+ "amo" #amo_insn #sfx " %[ret], %[val], %[ptr]" \
+ : [ptr] "+A" (*(u##sz *)ptr), [ret] "=r" (ret) \
+ : [val] "r" ((u##sz)(val)) \
+ : "memory"); \
+ \
+ return ret + val; \
+}
+
+#define PERCPU_RET_OP(name, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.w, name, 32, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.d, name, 64, amo_insn)
+
+PERCPU_RET_OP(add, add)
+
+#define PERCPU_8_16_GET_SHIFT(ptr) (((unsigned long)(ptr) & 0x3) * BITS_PER_BYTE)
+#define PERCPU_8_16_GET_MASK(sz) GENMASK((sz)-1, 0)
+#define PERCPU_8_16_GET_PTR32(ptr) ((u32 *)((unsigned long)(ptr) & ~0x3))
+
+#define PERCPU_8_16_OP(name, amo_insn, sz, sfx, val_type, new_val_expr, asm_op) \
+static inline void __percpu_##name##_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ asm volatile ("amo" #amo_insn #sfx " zero, %[val], %[ptr]" \
+ : [ptr] "+A"(*(val_type *)ptr) \
+ : [val] "r"((val_type)(new_val_expr)) \
+ : "memory"); \
+ } else { \
+ u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr); \
+ const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr); \
+ const u32 mask = PERCPU_8_16_GET_MASK(sz) << shift; \
+ const val_type val_trunc = (val_type)(new_val_expr); \
+ u32 retx, rc; \
+ val_type new_val_type; \
+ \
+ asm volatile ( \
+ "0: lr.w %0, %2\n" \
+ "and %3, %0, %4\n" \
+ "srl %3, %3, %5\n" \
+ #asm_op " %3, %3, %6\n" \
+ "sll %3, %3, %5\n" \
+ "and %1, %0, %7\n" \
+ "or %1, %1, %3\n" \
+ "sc.w %1, %1, %2\n" \
+ "bnez %1, 0b\n" \
+ : "=&r"(retx), "=&r"(rc), "+A"(*ptr32), "=&r"(new_val_type) \
+ : "r"(mask), "r"(shift), "r"(val_trunc), "r"(~mask) \
+ : "memory"); \
+ } \
+}
+
+#define PERCPU_OP_8_16(op_name, op, expr, final_op) \
+ PERCPU_8_16_OP(op_name, op, 8, .b, u8, expr, final_op); \
+ PERCPU_8_16_OP(op_name, op, 16, .h, u16, expr, final_op)
+
+PERCPU_OP_8_16(add, add, val, add)
+PERCPU_OP_8_16(andnot, and, ~val, and)
+PERCPU_OP_8_16(or, or, val, or)
+
+#define PERCPU_8_16_RET_OP(name, amo_insn, sz, sfx, val_type, new_val_expr) \
+static inline val_type __percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ register val_type ret; \
+ asm volatile ("amo" #amo_insn #sfx " %[ret], %[val], %[ptr]" \
+ : [ptr] "+A"(*(val_type *)ptr), [ret] "=r"(ret) \
+ : [val] "r"((val_type)(new_val_expr)) \
+ : "memory"); \
+ return ret + (val_type)(new_val_expr); \
+ } else { \
+ u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr); \
+ const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr); \
+ const u32 mask = (PERCPU_8_16_GET_MASK(sz) << shift); \
+ const u32 inv_mask = ~mask; \
+ const val_type val_trunc = (val_type)(new_val_expr); \
+ u32 old, new, tmp; \
+ \
+ asm volatile ( \
+ "0: lr.w %0, %3\n" \
+ "and %1, %0, %4\n" \
+ "srl %1, %1, %5\n" \
+ "add %1, %1, %6\n" \
+ "and %1, %1, %7\n" \
+ "sll %1, %1, %5\n" \
+ "and %2, %0, %8\n" \
+ "or %2, %2, %1\n" \
+ "sc.w %2, %2, %3\n" \
+ "bnez %2, 0b\n" \
+ : "=r"(old), "=r"(tmp), "=&r"(new), "+A"(*ptr32) \
+ : "r"(mask), "r"(shift), "r"(val_trunc), "r"(PERCPU_8_16_GET_MASK(sz)), \
+ "r"(inv_mask) \
+ : "memory"); \
+ return (val_type)(tmp); \
+ } \
+}
+
+PERCPU_8_16_RET_OP(add, add, 8, .b, u8, val)
+PERCPU_8_16_RET_OP(add, add, 16, .h, u16, val)
+
+#define _pcp_protect(op, pcp, ...) \
+({ \
+ preempt_disable_notrace(); \
+ op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
+ preempt_enable_notrace(); \
+})
+
+#define _pcp_protect_return(op, pcp, args...) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable_notrace(); \
+ __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
+ preempt_enable_notrace(); \
+ __retval; \
+})
+
+#define this_cpu_read_1(pcp) _pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp) _pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp) _pcp_protect_return(__percpu_read_32, pcp)
+#define this_cpu_read_8(pcp) _pcp_protect_return(__percpu_read_64, pcp)
+
+#define this_cpu_write_1(pcp, val) _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val) _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val) _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+#define this_cpu_write_8(pcp, val) _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+
+#define this_cpu_add_1(pcp, val) _pcp_protect(__percpu_add_amo_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val) _pcp_protect(__percpu_add_amo_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val) _pcp_protect(__percpu_add_amo_case_32, pcp, val)
+#define this_cpu_add_8(pcp, val) _pcp_protect(__percpu_add_amo_case_64, pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_8, pcp, val)
+
+#define this_cpu_add_return_2(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_16, pcp, val)
+
+#define this_cpu_add_return_4(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_32, pcp, val)
+
+#define this_cpu_add_return_8(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
+
+#define this_cpu_and_1(pcp, val) _pcp_protect(__percpu_andnot_amo_case_8, pcp, ~val)
+#define this_cpu_and_2(pcp, val) _pcp_protect(__percpu_andnot_amo_case_16, pcp, ~val)
+#define this_cpu_and_4(pcp, val) _pcp_protect(__percpu_andnot_amo_case_32, pcp, ~val)
+#define this_cpu_and_8(pcp, val) _pcp_protect(__percpu_andnot_amo_case_64, pcp, ~val)
+
+#define this_cpu_or_1(pcp, val) _pcp_protect(__percpu_or_amo_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val) _pcp_protect(__percpu_or_amo_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val) _pcp_protect(__percpu_or_amo_case_32, pcp, val)
+#define this_cpu_or_8(pcp, val) _pcp_protect(__percpu_or_amo_case_64, pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_8(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+
+#define this_cpu_cmpxchg_1(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_8(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ u128 old__, new__, ret__; \
+ typeof(pcp) *ptr__; \
+ old__ = o; \
+ new__ = n; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128_local(ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
--
2.39.5
Powered by blists - more mailing lists