[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241129144319.74257-1-arikalo@gmail.com>
Date: Fri, 29 Nov 2024 15:43:19 +0100
From: Aleksandar Rikalo <arikalo@...il.com>
To: linux-riscv@...ts.infradead.org
Cc: Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>,
Albert Ou <aou@...s.berkeley.edu>,
Will Deacon <will@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Boqun Feng <boqun.feng@...il.com>,
Mark Rutland <mark.rutland@....com>,
Yury Norov <yury.norov@...il.com>,
Rasmus Villemoes <linux@...musvillemoes.dk>,
Andrea Parri <parri.andrea@...il.com>,
Leonardo Bras <leobras@...hat.com>,
Guo Ren <guoren@...nel.org>,
Samuel Holland <samuel.holland@...ive.com>,
Eric Chan <ericchancf@...gle.com>,
Aleksandar Rikalo <arikalo@...il.com>,
linux-kernel@...r.kernel.org,
Djordje Todorovic <djordje.todorovic@...cgroup.com>
Subject: [PATCH] riscv: Rewrite AMO instructions via lr and sc.
From: Chao-ying Fu <cfu@...s.com>
Use lr and sc to implement all atomic functions. Some CPUs have
native support for lr and sc, but emulate AMO instructions through
trap handlers that are slow.
Add config RISCV_ISA_ZALRSC_ONLY.
Signed-off-by: Chao-ying Fu <cfu@...s.com>
Signed-off-by: Aleksandar Rikalo <arikalo@...il.com>
---
arch/riscv/Kconfig | 10 ++++++
arch/riscv/include/asm/atomic.h | 52 +++++++++++++++++++++++++++++++-
arch/riscv/include/asm/bitops.h | 45 +++++++++++++++++++++++++++
arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
arch/riscv/include/asm/futex.h | 46 ++++++++++++++++++++++++++++
5 files changed, 168 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cc63aef41e94..767538c27875 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -715,6 +715,16 @@ config RISCV_ISA_ZACAS
If you don't know what to do here, say Y.
+config RISCV_ISA_ZALRSC_ONLY
+ bool "Zalrsc extension support only"
+ default n
+ help
+ Use lr and sc to build all atomic functions. Some CPUs have
+ native support for lr and sc, but emulate amo instructions through
+ trap handlers that are slow.
+
+ If you don't know what to do here, say n.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb..f484babecb9e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
* have the AQ or RL bits set. These don't return anything, so there's only
* one version to worry about.
*/
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
@@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
: "+A" (v->counter) \
: "r" (I) \
: "memory"); \
-} \
+}
+#else
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
+static __always_inline \
+void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
+{ \
+ register c_type ret, temp; \
+ __asm__ __volatile__ ( \
+ "1: lr." #asm_type " %1, %0\n" \
+ " " #asm_op " %2, %1, %3\n" \
+ " sc." #asm_type " %2, %2, %0\n" \
+ " bnez %2, 1b\n" \
+ : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
+ : "r" (I) \
+ : "memory"); \
+}
+#endif
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
@@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor, i)
* There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions.
*/
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
@@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
: "memory"); \
return ret; \
}
+#else
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret, temp; \
+ __asm__ __volatile__ ( \
+ "1: lr." #asm_type " %1, %0\n" \
+ " " #asm_op " %2, %1, %3\n" \
+ " sc." #asm_type " %2, %2, %0\n" \
+ " bnez %2, 1b\n" \
+ : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
+{ \
+ register c_type ret, temp; \
+ __asm__ __volatile__ ( \
+ "1: lr." #asm_type ".aqrl %1, %0\n" \
+ " " #asm_op " %2, %1, %3\n" \
+ " sc." #asm_type ".aqrl %2, %2, %0\n" \
+ " bnez %2, 1b\n" \
+ : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+}
+#endif
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
static __always_inline \
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index fae152ea0508..b51cb18f7d9e 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
#if (BITS_PER_LONG == 64)
#define __AMO(op) "amo" #op ".d"
+#define __LR "lr.d"
+#define __SC "sc.d"
#elif (BITS_PER_LONG == 32)
#define __AMO(op) "amo" #op ".w"
+#define __LR "lr.w"
+#define __SC "sc.w"
#else
#error "Unexpected BITS_PER_LONG"
#endif
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
#define __test_and_op_bit_ord(op, mod, nr, addr, ord) \
({ \
unsigned long __res, __mask; \
@@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
: "+A" (addr[BIT_WORD(nr)]) \
: "r" (mod(BIT_MASK(nr))) \
: "memory");
+#else
+#define __test_and_op_bit_ord(op, mod, nr, addr, ord) \
+({ \
+ unsigned long __res, __mask, __temp; \
+ __mask = BIT_MASK(nr); \
+ __asm__ __volatile__ ( \
+ "1: " __LR #ord " %0, %1\n" \
+ #op " %2, %0, %3\n" \
+ __SC #ord " %2, %2, %1\n" \
+ "bnez %2, 1b\n" \
+ : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp) \
+ : "r" (mod(__mask)) \
+ : "memory"); \
+ ((__res & __mask) != 0); \
+})
+
+#define __op_bit_ord(op, mod, nr, addr, ord) \
+ unsigned long __res, __temp; \
+ __asm__ __volatile__ ( \
+ "1: " __LR #ord " %0, %1\n" \
+ #op " %2, %0, %3\n" \
+ __SC #ord " %2, %2, %1\n" \
+ "bnez %2, 1b\n" \
+ : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp) \
+ : "r" (mod(BIT_MASK(nr))) \
+ : "memory")
+#endif
#define __test_and_op_bit(op, mod, nr, addr) \
__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
@@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
volatile unsigned long *addr)
{
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
unsigned long res;
__asm__ __volatile__ (
__AMO(xor) ".rl %0, %2, %1"
: "=r" (res), "+A" (*addr)
: "r" (__NOP(mask))
: "memory");
+#else
+ unsigned long res, temp;
+
+ __asm__ __volatile__ (
+ "1: " __LR ".rl %0, %1\n"
+ "xor %2, %0, %3\n"
+ __SC ".rl %2, %2, %1\n"
+ "bnez %2, 1b\n"
+ : "=&r" (res), "+A" (*addr), "=&r" (temp)
+ : "r" (__NOP(mask))
+ : "memory");
+#endif
return (res & BIT(7)) != 0;
}
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 4cadc56220fe..881082b05110 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -51,6 +51,7 @@
} \
})
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
#define __arch_xchg(sfx, prepend, append, r, p, n) \
({ \
__asm__ __volatile__ ( \
@@ -61,6 +62,21 @@
: "r" (n) \
: "memory"); \
})
+#else
+#define __arch_xchg(sfx, prepend, append, r, p, n) \
+({ \
+ __typeof__(*(__ptr)) temp; \
+ __asm__ __volatile__ ( \
+ prepend \
+ "1: lr" sfx " %0, %1\n" \
+ " sc" sfx " %2, %3, %1\n" \
+ " bnez %2, 1b\n" \
+ append \
+ : "=&r" (r), "+A" (*(p)), "=&r" (temp) \
+ : "r" (n) \
+ : "memory"); \
+})
+#endif
#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
sc_append, swap_append) \
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index fc8130f995c1..47297f47ec35 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -19,6 +19,7 @@
#define __disable_user_access() do { } while (0)
#endif
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
__enable_user_access(); \
@@ -32,16 +33,39 @@
: "memory"); \
__disable_user_access(); \
}
+#else
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+{ \
+ __enable_user_access(); \
+ __asm__ __volatile__ ( \
+ "1: lr.w.aqrl %[ov], %[u]\n" \
+ " " insn "\n" \
+ " sc.w.aqrl %[t], %[t], %[u]\n" \
+ " bnez %[t], 1b\n" \
+ "2:\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \
+ : [r] "+r" (ret), [ov] "=&r" (oldval), \
+ [t] "=&r" (temp), [u] "+m" (*uaddr) \
+ : [op] "Jr" (oparg) \
+ : "memory"); \
+ __disable_user_access(); \
+}
+#endif
static inline int
arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
int oldval = 0, ret = 0;
+#else
+ int oldval = 0, ret = 0, temp = 0;
+#endif
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
case FUTEX_OP_SET:
__futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
ret, oldval, uaddr, oparg);
@@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
__futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
ret, oldval, uaddr, oparg);
break;
+#else
+ case FUTEX_OP_SET:
+ __futex_atomic_op("mv %[t], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("add %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("or %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("and %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("xor %[t], %[ov], %z[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+#endif
default:
ret = -ENOSYS;
}
--
2.25.1
Powered by blists - more mailing lists