[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260120-lrsc-only-v2-1-a522e640d27d@mobileye.com>
Date: Tue, 20 Jan 2026 16:26:23 +0200
From: Vladimir Kondratiev <vladimir.kondratiev@...ileye.com>
To: Paul Walmsley <pjw@...nel.org>,
Palmer Dabbelt <palmer@...belt.com>,
Albert Ou <aou@...s.berkeley.edu>,
Alexandre Ghiti <alex@...ti.fr>,
Will Deacon <will@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Boqun Feng <boqun.feng@...il.com>,
Mark Rutland <mark.rutland@....com>,
Gary Guo <gary@...yguo.net>,
Yury Norov <yury.norov@...il.com>,
Rasmus Villemoes <linux@...musvillemoes.dk>,
cfu@...ecomp.com,
torvalds@...ux-foundation.org,
olof@...om.net,
aleksa.paunovic@...cgroup.com,
arikalo@...il.com
Cc: Vladimir Kondratiev <vladimir.kondratiev@...ileye.com>,
linux-riscv@...ts.infradead.org,
linux-kernel@...r.kernel.org,
Vladimir.Kondratiev@...ileye.com
Subject: [PATCH v2 1/2] riscv: support ISA extensions "zaamo" and "zalrsc"
riscv have 3 instruction set extensions related to atomic operations:
- "zaamo": atomic instructions like AMOADD
- "zalrsc": LR and SC instructions
- "a" that is "zaamo" + "zalrsc"
Historically, "a" was first, and Linux was relying on "a";
then "zaamo"/"zalrsc" was introduced. It is possible to implement
most atomic operations with either AMO or LR/SC. AMO if more efficient
however more complex flows are possible with LR/SC only.
Platforms supporting only part of atomics starting to appear.
Notable is MIPS P8700 CPU [1] having only "zalrsc".
Support configurations not having "A" but supporting one of
"zaamo"/"zalrsc".
The "RISC-V C API" [2] defines architecture extension test macros.
It says naming rule for the test macros is __riscv_<ext_name>, where
<ext_name> is all lower-case.
Alternative to the __riscv_a macro name, __riscv_atomic, is deprecated
but used by old toolchains.
Fix "-march" compiler flag and use extension test macros in code
For decisions use
#if defined(__riscv_atomic) || defined(__riscv_zaamo)
For the full "A" support, old toolchains set __riscv_atomic while
new ones set __riscv_a, __riscv_zaamo and __riscv_zalrsc
Add alternative LR/SC implementations for AMO based code fragments,
prefer AMO if possible and fallback to LR/SC
[1] https://mips.com/products/hardware/p8700/
[2] https://github.com/riscv-non-isa/riscv-c-api-doc
Suggested-by: Chao-ying Fu <cfu@...ecomp.com>
Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@...ileye.com>
---
arch/riscv/Kconfig | 19 ++++++++++++++
arch/riscv/Makefile | 16 ++++++++----
arch/riscv/include/asm/atomic.h | 56 +++++++++++++++++++++++++++++++++++++++-
arch/riscv/include/asm/bitops.h | 48 ++++++++++++++++++++++++++++++++++
arch/riscv/include/asm/cmpxchg.h | 18 +++++++++++++
arch/riscv/include/asm/futex.h | 52 +++++++++++++++++++++++++++++++++++++
arch/riscv/kernel/entry.S | 9 +++++++
arch/riscv/kernel/head.S | 18 +++++++++++++
8 files changed, 230 insertions(+), 6 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index fadec20b87a8e3ef97bed64270e496ddb45244ac..824c9c64851c1c4d751d1a6b3c81e6a519e9fb31 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -710,6 +710,25 @@ config RISCV_ISA_ZACAS
If you don't know what to do here, say Y.
+config RISCV_ISA_ZAAMO
+ bool "Zaamo extension support for AMO atomics" if NONPORTABLE
+ default y
+ help
+ support AMO instructions.
+
+ If you don't know what to do here, say Y.
+
+config RISCV_ISA_ZALRSC
+ bool "Zalrsc extension support for LR/SC atomics" if NONPORTABLE
+ default y
+ help
+ support lr and sc to build atomic functions.
+
+ If you don't know what to do here, say Y.
+
+config RISCV_ISA_A
+ def_bool RISCV_ISA_ZAAMO && RISCV_ISA_ZALRSC
+
config TOOLCHAIN_HAS_ZBB
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4c6de57f65ef0e0339358f6c9ab8d4e7a6d1263f..607f3e12e9ccda5fe031821965a4e4901925b715 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -58,8 +58,9 @@ ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
endif
# ISA string setting
-riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
-riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
+riscv-march-$(CONFIG_ARCH_RV32I) := rv32im
+riscv-march-$(CONFIG_ARCH_RV64I) := rv64im
+riscv-march-$(CONFIG_RISCV_ISA_A) := $(riscv-march-y)a
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v
@@ -81,14 +82,19 @@ riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
# Check if the toolchain supports Zabha
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
+# if "A" not supported, toolchain shall support zaamo/zalrsc
+ifneq ($(CONFIG_RISCV_ISA_A),y)
+riscv-march-$(CONFIG_RISCV_ISA_ZAAMO) := $(riscv-march-y)_zaamo
+riscv-march-$(CONFIG_RISCV_ISA_ZALRSC) := $(riscv-march-y)_zalrsc
+endif
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
-KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
-
+KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima?|rv64ima?)fd([^v_]*)v?/\1\2/')
KBUILD_AFLAGS += -march=$(riscv-march-y)
# For C code built with floating-point support, exclude V but keep F and D.
-CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/')
+CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima?|rv64ima?)([^v_]*)v?/\1\2/')
KBUILD_CFLAGS += -mno-save-restore
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb596caf8ee6355d4ee86dbc19903b..1fa57e5a19590c668e81341c05aaff14a0bee6b9 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
* have the AQ or RL bits set. These don't return anything, so there's only
* one version to worry about.
*/
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
@@ -59,7 +60,25 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
: "+A" (v->counter) \
: "r" (I) \
: "memory"); \
-} \
+}
+#elif defined(__riscv_zalrsc)
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
+static __always_inline \
+void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
+{ \
+ register c_type ret, temp; \
+ __asm__ __volatile__ ( \
+ "1: lr." #asm_type " %1, %0\n" \
+ " " #asm_op " %2, %1, %3\n" \
+ " sc." #asm_type " %2, %2, %0\n" \
+ " bnez %2, 1b\n" \
+ : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
+ : "r" (I) \
+ : "memory"); \
+}
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
@@ -84,6 +103,7 @@ ATOMIC_OPS(xor, xor, i)
* There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions.
*/
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
@@ -108,6 +128,40 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
: "memory"); \
return ret; \
}
+#elif defined(__riscv_zalrsc)
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret, temp; \
+ __asm__ __volatile__ ( \
+ "1: lr." #asm_type " %1, %0\n" \
+ " " #asm_op " %2, %1, %3\n" \
+ " sc." #asm_type " %2, %2, %0\n" \
+ " bnez %2, 1b\n" \
+ : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
+{ \
+ register c_type ret, temp; \
+ __asm__ __volatile__ ( \
+ "1: lr." #asm_type ".aqrl %1, %0\n" \
+ " " #asm_op " %2, %1, %3\n" \
+ " sc." #asm_type ".aqrl %2, %2, %0\n" \
+ " bnez %2, 1b\n" \
+ : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+}
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
static __always_inline \
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 77880677b06e03875721f33515a6d2ac9166c373..994b15c8a5cd3349bc929cc847ffde4629ce0251 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
#if (BITS_PER_LONG == 64)
#define __AMO(op) "amo" #op ".d"
+#define __LR "lr.d"
+#define __SC "sc.d"
#elif (BITS_PER_LONG == 32)
#define __AMO(op) "amo" #op ".w"
+#define __LR "lr.w"
+#define __SC "sc.w"
#else
#error "Unexpected BITS_PER_LONG"
#endif
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
#define __test_and_op_bit_ord(op, mod, nr, addr, ord) \
({ \
unsigned long __res, __mask; \
@@ -211,6 +216,35 @@ static __always_inline int variable_fls(unsigned int x)
: "+A" (addr[BIT_WORD(nr)]) \
: "r" (mod(BIT_MASK(nr))) \
: "memory");
+#elif defined(__riscv_zalrsc)
+#define __test_and_op_bit_ord(op, mod, nr, addr, ord) \
+({ \
+ unsigned long __res, __mask, __temp; \
+ __mask = BIT_MASK(nr); \
+ __asm__ __volatile__ ( \
+ "1: " __LR #ord " %0, %1\n" \
+ #op " %2, %0, %3\n" \
+ __SC #ord " %2, %2, %1\n" \
+ "bnez %2, 1b\n" \
+ : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp) \
+ : "r" (mod(__mask)) \
+ : "memory"); \
+ ((__res & __mask) != 0); \
+})
+
+#define __op_bit_ord(op, mod, nr, addr, ord) \
+ unsigned long __res, __temp; \
+ __asm__ __volatile__ ( \
+ "1: " __LR #ord " %0, %1\n" \
+ #op " %2, %0, %3\n" \
+ __SC #ord " %2, %2, %1\n" \
+ "bnez %2, 1b\n" \
+ : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp) \
+ : "r" (mod(BIT_MASK(nr))) \
+ : "memory")
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
#define __test_and_op_bit(op, mod, nr, addr) \
__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
@@ -354,12 +388,26 @@ static __always_inline void arch___clear_bit_unlock(
static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
volatile unsigned long *addr)
{
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
unsigned long res;
__asm__ __volatile__ (
__AMO(xor) ".rl %0, %2, %1"
: "=r" (res), "+A" (*addr)
: "r" (__NOP(mask))
: "memory");
+#elif defined(__riscv_zalrsc)
+ unsigned long res, temp;
+ __asm__ __volatile__ (
+ "1: " __LR ".rl %0, %1\n"
+ "xor %2, %0, %3\n"
+ __SC ".rl %2, %2, %1\n"
+ "bnez %2, 1b\n"
+ : "=&r" (res), "+A" (*addr), "=&r" (temp)
+ : "r" (__NOP(mask))
+ : "memory");
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
return (res & BIT(7)) != 0;
}
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 122e1485d39a0ad44ec4357cb23148dc6e58dc6b..c0feb20241590a589e5d62d08a8f6c9a37d339c3 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -54,6 +54,7 @@
} \
})
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
#define __arch_xchg(sfx, prepend, append, r, p, n) \
({ \
__asm__ __volatile__ ( \
@@ -64,6 +65,23 @@
: "r" (n) \
: "memory"); \
})
+#elif defined(__riscv_zalrsc)
+#define __arch_xchg(sfx, prepend, append, r, p, n) \
+({ \
+ __typeof__(*(__ptr)) temp; \
+ __asm__ __volatile__ ( \
+ prepend \
+ "1: lr" sfx " %0, %1\n" \
+ " sc" sfx " %2, %3, %1\n" \
+ " bnez %2, 1b\n" \
+ append \
+ : "=&r" (r), "+A" (*(p)), "=&r" (temp) \
+ : "r" (n) \
+ : "memory"); \
+})
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
sc_append, swap_append) \
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 90c86b115e008a1fb08f3da64382fb4a64d9cc2f..bb9393a2a1abd19f56d9f960a207e24dadf30670 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -19,6 +19,7 @@
#define __disable_user_access() do { } while (0)
#endif
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
__enable_user_access(); \
@@ -32,16 +33,43 @@
: "memory"); \
__disable_user_access(); \
}
+#elif defined(__riscv_zalrsc)
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+{ \
+ __enable_user_access(); \
+ __asm__ __volatile__ ( \
+ "1: lr.w.aqrl %[ov], %[u] \n" \
+ " " insn " \n" \
+ " sc.w.aqrl %[t], %[t], %[u] \n" \
+ " bnez %[t], 1b \n" \
+ "2: \n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \
+ : [r] "+r" (ret), [ov] "=&r" (oldval), \
+ [t] "=&r" (temp), [u] "+m" (*(uaddr)) \
+ : [op] "Jr" (oparg) \
+ : "memory"); \
+ __disable_user_access(); \
+}
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
static inline int
arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
int oldval = 0, ret = 0;
+#elif defined(__riscv_zalrsc)
+ int oldval = 0, ret = 0, temp = 0;
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
case FUTEX_OP_SET:
__futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
ret, oldval, uaddr, oparg);
@@ -62,6 +90,30 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
__futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
ret, oldval, uaddr, oparg);
break;
+#elif defined(__riscv_zalrsc)
+ case FUTEX_OP_SET:
+ __futex_atomic_op("mv %[t], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("add %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("or %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("and %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("xor %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
default:
ret = -ENOSYS;
}
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 9b9dec6893b81a6b0c39af654590848e9ef754c1..a9a39a15a7c685265527be74e37c37add7416446 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -73,7 +73,16 @@
beq a2, zero, .Lnew_vmalloc_restore_context
/* Atomically reset the current cpu bit in new_vmalloc */
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
amoxor.d a0, a1, (a0)
+#elif defined(__riscv_zalrsc)
+1: lr.d a2, (a0)
+ xor a2, a1, a2
+ sc.d a2, a2, (a0)
+ bnez a2, 1b
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
/* Only emit a sfence.vma if the uarch caches invalid entries */
ALTERNATIVE("sfence.vma", "nop", 0, RISCV_ISA_EXT_SVVPTC, 1)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index bdf3352acf4cb48aaaa47f7b0e5fc98c0c5b1712..6e4c977ff782f78e23cb344cc28f98ea20f016f6 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -259,7 +259,17 @@ SYM_CODE_START(_start_kernel)
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
li a2, 1
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
amoadd.w a3, a2, (a3)
+#elif defined(__riscv_zalrsc)
+1: lr.w t0, (a3)
+ addw t1, t0, a2
+ sc.w t1, t1, (a3)
+ bnez t1, 1b
+ mv a3, t0
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
bnez a3, .Lsecondary_start
#else
@@ -269,7 +279,15 @@ SYM_CODE_START(_start_kernel)
XIP_FIXUP_OFFSET a2
XIP_FIXUP_FLASH_OFFSET a3
lw t1, (a3)
+#if defined(__riscv_atomic) || defined(__riscv_zaamo)
amoswap.w t0, t1, (a2)
+#elif defined(__riscv_zalrsc)
+1: lr.w t0, (a2)
+ sc.w t2, t1, (a2)
+ bnez t2, 1b
+#else
+#error "Need AMO or LR/SC atomics"
+#endif
/* first time here if hart_lottery in RAM is not set */
beq t0, t1, .Lsecondary_start
--
2.43.0
Powered by blists - more mailing lists