[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJF2gTRO5Q=CMT8N5nVOF9oOvDb7CtakeemOkX1-v_w-WPuaXQ@mail.gmail.com>
Date: Fri, 29 Nov 2024 23:12:54 +0800
From: Guo Ren <guoren@...nel.org>
To: Aleksandar Rikalo <arikalo@...il.com>
Cc: linux-riscv@...ts.infradead.org, Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>, Albert Ou <aou@...s.berkeley.edu>,
Will Deacon <will@...nel.org>, Peter Zijlstra <peterz@...radead.org>,
Boqun Feng <boqun.feng@...il.com>, Mark Rutland <mark.rutland@....com>,
Yury Norov <yury.norov@...il.com>, Rasmus Villemoes <linux@...musvillemoes.dk>,
Andrea Parri <parri.andrea@...il.com>, Leonardo Bras <leobras@...hat.com>,
Samuel Holland <samuel.holland@...ive.com>, Eric Chan <ericchancf@...gle.com>,
linux-kernel@...r.kernel.org,
Djordje Todorovic <djordje.todorovic@...cgroup.com>
Subject: Re: [PATCH] riscv: Rewrite AMO instructions via lr and sc.
On Fri, Nov 29, 2024 at 10:43 PM Aleksandar Rikalo <arikalo@...il.com> wrote:
>
> From: Chao-ying Fu <cfu@...s.com>
>
> Use lr and sc to implement all atomic functions. Some CPUs have
> native support for lr and sc, but emulate AMO instructions through
> trap handlers that are slow.
>
> Add config RISCV_ISA_ZALRSC_ONLY.
>
> Signed-off-by: Chao-ying Fu <cfu@...s.com>
> Signed-off-by: Aleksandar Rikalo <arikalo@...il.com>
> ---
> arch/riscv/Kconfig | 10 ++++++
> arch/riscv/include/asm/atomic.h | 52 +++++++++++++++++++++++++++++++-
> arch/riscv/include/asm/bitops.h | 45 +++++++++++++++++++++++++++
> arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
> arch/riscv/include/asm/futex.h | 46 ++++++++++++++++++++++++++++
> 5 files changed, 168 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index cc63aef41e94..767538c27875 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -715,6 +715,16 @@ config RISCV_ISA_ZACAS
>
> If you don't know what to do here, say Y.
>
> +config RISCV_ISA_ZALRSC_ONLY
> + bool "Zalrsc extension support only"
> + default n
> + help
> + Use lr and sc to build all atomic functions. Some CPUs have
> + native support for lr and sc, but emulate amo instructions through
> + trap handlers that are slow.
What's the actual hardware?
Is it emulated in m-mode?
Where is the code?
> +
> + If you don't know what to do here, say n.
> +
> config TOOLCHAIN_HAS_ZBB
> bool
> default y
> diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> index 5b96c2f61adb..f484babecb9e 100644
> --- a/arch/riscv/include/asm/atomic.h
> +++ b/arch/riscv/include/asm/atomic.h
> @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
> * have the AQ or RL bits set. These don't return anything, so there's only
> * one version to worry about.
> */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
> static __always_inline \
> void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
> @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
> : "+A" (v->counter) \
> : "r" (I) \
> : "memory"); \
> -} \
> +}
> +#else
> +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
> +static __always_inline \
> +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
> +{ \
> + register c_type ret, temp; \
> + __asm__ __volatile__ ( \
> + "1: lr." #asm_type " %1, %0\n" \
> + " " #asm_op " %2, %1, %3\n" \
> + " sc." #asm_type " %2, %2, %0\n" \
> + " bnez %2, 1b\n" \
> + : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
> + : "r" (I) \
> + : "memory"); \
> +}
> +#endif
>
> #ifdef CONFIG_GENERIC_ATOMIC64
> #define ATOMIC_OPS(op, asm_op, I) \
> @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor, i)
> * There's two flavors of these: the arithmatic ops have both fetch and return
> * versions, while the logical ops only have fetch versions.
> */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
> static __always_inline \
> c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
> @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
> : "memory"); \
> return ret; \
> }
> +#else
> +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
> +static __always_inline \
> +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
> + atomic##prefix##_t *v) \
> +{ \
> + register c_type ret, temp; \
> + __asm__ __volatile__ ( \
> + "1: lr." #asm_type " %1, %0\n" \
> + " " #asm_op " %2, %1, %3\n" \
> + " sc." #asm_type " %2, %2, %0\n" \
> + " bnez %2, 1b\n" \
> + : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
> + : "r" (I) \
> + : "memory"); \
> + return ret; \
> +} \
> +static __always_inline \
> +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
> +{ \
> + register c_type ret, temp; \
> + __asm__ __volatile__ ( \
> + "1: lr." #asm_type ".aqrl %1, %0\n" \
> + " " #asm_op " %2, %1, %3\n" \
> + " sc." #asm_type ".aqrl %2, %2, %0\n" \
> + " bnez %2, 1b\n" \
> + : "+A" (v->counter), "=&r" (ret), "=&r" (temp) \
> + : "r" (I) \
> + : "memory"); \
> + return ret; \
> +}
> +#endif
>
> #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
> static __always_inline \
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index fae152ea0508..b51cb18f7d9e 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
>
> #if (BITS_PER_LONG == 64)
> #define __AMO(op) "amo" #op ".d"
> +#define __LR "lr.d"
> +#define __SC "sc.d"
> #elif (BITS_PER_LONG == 32)
> #define __AMO(op) "amo" #op ".w"
> +#define __LR "lr.w"
> +#define __SC "sc.w"
> #else
> #error "Unexpected BITS_PER_LONG"
> #endif
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> #define __test_and_op_bit_ord(op, mod, nr, addr, ord) \
> ({ \
> unsigned long __res, __mask; \
> @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
> : "+A" (addr[BIT_WORD(nr)]) \
> : "r" (mod(BIT_MASK(nr))) \
> : "memory");
> +#else
> +#define __test_and_op_bit_ord(op, mod, nr, addr, ord) \
> +({ \
> + unsigned long __res, __mask, __temp; \
> + __mask = BIT_MASK(nr); \
> + __asm__ __volatile__ ( \
> + "1: " __LR #ord " %0, %1\n" \
> + #op " %2, %0, %3\n" \
> + __SC #ord " %2, %2, %1\n" \
> + "bnez %2, 1b\n" \
> + : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp) \
> + : "r" (mod(__mask)) \
> + : "memory"); \
> + ((__res & __mask) != 0); \
> +})
> +
> +#define __op_bit_ord(op, mod, nr, addr, ord) \
> + unsigned long __res, __temp; \
> + __asm__ __volatile__ ( \
> + "1: " __LR #ord " %0, %1\n" \
> + #op " %2, %0, %3\n" \
> + __SC #ord " %2, %2, %1\n" \
> + "bnez %2, 1b\n" \
> + : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp) \
> + : "r" (mod(BIT_MASK(nr))) \
> + : "memory")
> +#endif
>
> #define __test_and_op_bit(op, mod, nr, addr) \
> __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
> static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
> volatile unsigned long *addr)
> {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> unsigned long res;
> __asm__ __volatile__ (
> __AMO(xor) ".rl %0, %2, %1"
> : "=r" (res), "+A" (*addr)
> : "r" (__NOP(mask))
> : "memory");
> +#else
> + unsigned long res, temp;
> +
> + __asm__ __volatile__ (
> + "1: " __LR ".rl %0, %1\n"
> + "xor %2, %0, %3\n"
> + __SC ".rl %2, %2, %1\n"
> + "bnez %2, 1b\n"
> + : "=&r" (res), "+A" (*addr), "=&r" (temp)
> + : "r" (__NOP(mask))
> + : "memory");
> +#endif
> return (res & BIT(7)) != 0;
> }
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 4cadc56220fe..881082b05110 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -51,6 +51,7 @@
> } \
> })
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> #define __arch_xchg(sfx, prepend, append, r, p, n) \
> ({ \
> __asm__ __volatile__ ( \
> @@ -61,6 +62,21 @@
> : "r" (n) \
> : "memory"); \
> })
> +#else
> +#define __arch_xchg(sfx, prepend, append, r, p, n) \
> +({ \
> + __typeof__(*(__ptr)) temp; \
> + __asm__ __volatile__ ( \
> + prepend \
> + "1: lr" sfx " %0, %1\n" \
> + " sc" sfx " %2, %3, %1\n" \
> + " bnez %2, 1b\n" \
> + append \
> + : "=&r" (r), "+A" (*(p)), "=&r" (temp) \
> + : "r" (n) \
> + : "memory"); \
> +})
> +#endif
>
> #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
> sc_append, swap_append) \
> diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> index fc8130f995c1..47297f47ec35 100644
> --- a/arch/riscv/include/asm/futex.h
> +++ b/arch/riscv/include/asm/futex.h
> @@ -19,6 +19,7 @@
> #define __disable_user_access() do { } while (0)
> #endif
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
> { \
> __enable_user_access(); \
> @@ -32,16 +33,39 @@
> : "memory"); \
> __disable_user_access(); \
> }
> +#else
> +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
> +{ \
> + __enable_user_access(); \
> + __asm__ __volatile__ ( \
> + "1: lr.w.aqrl %[ov], %[u]\n" \
> + " " insn "\n" \
> + " sc.w.aqrl %[t], %[t], %[u]\n" \
> + " bnez %[t], 1b\n" \
> + "2:\n" \
> + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \
> + : [r] "+r" (ret), [ov] "=&r" (oldval), \
> + [t] "=&r" (temp), [u] "+m" (*uaddr) \
> + : [op] "Jr" (oparg) \
> + : "memory"); \
> + __disable_user_access(); \
> +}
> +#endif
>
> static inline int
> arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> int oldval = 0, ret = 0;
> +#else
> + int oldval = 0, ret = 0, temp = 0;
> +#endif
>
> if (!access_ok(uaddr, sizeof(u32)))
> return -EFAULT;
>
> switch (op) {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
> case FUTEX_OP_SET:
> __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
> ret, oldval, uaddr, oparg);
> @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
> ret, oldval, uaddr, oparg);
> break;
> +#else
> + case FUTEX_OP_SET:
> + __futex_atomic_op("mv %[t], %z[op]",
> + ret, oldval, uaddr, oparg);
> + break;
> + case FUTEX_OP_ADD:
> + __futex_atomic_op("add %[t], %[ov], %z[op]",
> + ret, oldval, uaddr, oparg);
> + break;
> + case FUTEX_OP_OR:
> + __futex_atomic_op("or %[t], %[ov], %z[op]",
> + ret, oldval, uaddr, oparg);
> + break;
> + case FUTEX_OP_ANDN:
> + __futex_atomic_op("and %[t], %[ov], %z[op]",
> + ret, oldval, uaddr, ~oparg);
> + break;
> + case FUTEX_OP_XOR:
> + __futex_atomic_op("xor %[t], %[ov], %z[op]",
> + ret, oldval, uaddr, oparg);
> + break;
> +#endif
> default:
> ret = -ENOSYS;
> }
> --
> 2.25.1
>
--
Best Regards
Guo Ren
Powered by blists - more mailing lists