linux-kernel - Re: [PATCH] riscv: Rewrite AMO instructions via lr and sc.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJF2gTRO5Q=CMT8N5nVOF9oOvDb7CtakeemOkX1-v_w-WPuaXQ@mail.gmail.com>
Date: Fri, 29 Nov 2024 23:12:54 +0800
From: Guo Ren <guoren@...nel.org>
To: Aleksandar Rikalo <arikalo@...il.com>
Cc: linux-riscv@...ts.infradead.org, Paul Walmsley <paul.walmsley@...ive.com>, 
	Palmer Dabbelt <palmer@...belt.com>, Albert Ou <aou@...s.berkeley.edu>, 
	Will Deacon <will@...nel.org>, Peter Zijlstra <peterz@...radead.org>, 
	Boqun Feng <boqun.feng@...il.com>, Mark Rutland <mark.rutland@....com>, 
	Yury Norov <yury.norov@...il.com>, Rasmus Villemoes <linux@...musvillemoes.dk>, 
	Andrea Parri <parri.andrea@...il.com>, Leonardo Bras <leobras@...hat.com>, 
	Samuel Holland <samuel.holland@...ive.com>, Eric Chan <ericchancf@...gle.com>, 
	linux-kernel@...r.kernel.org, 
	Djordje Todorovic <djordje.todorovic@...cgroup.com>
Subject: Re: [PATCH] riscv: Rewrite AMO instructions via lr and sc.

On Fri, Nov 29, 2024 at 10:43 PM Aleksandar Rikalo <arikalo@...il.com> wrote:
>
> From: Chao-ying Fu <cfu@...s.com>
>
> Use lr and sc to implement all atomic functions. Some CPUs have
> native support for lr and sc, but emulate AMO instructions through
> trap handlers that are slow.
>
> Add config RISCV_ISA_ZALRSC_ONLY.
>
> Signed-off-by: Chao-ying Fu <cfu@...s.com>
> Signed-off-by: Aleksandar Rikalo <arikalo@...il.com>
> ---
>  arch/riscv/Kconfig               | 10 ++++++
>  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
>  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
>  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
>  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
>  5 files changed, 168 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index cc63aef41e94..767538c27875 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -715,6 +715,16 @@ config RISCV_ISA_ZACAS
>
>           If you don't know what to do here, say Y.
>
> +config RISCV_ISA_ZALRSC_ONLY
> +       bool "Zalrsc extension support only"
> +       default n
> +       help
> +          Use lr and sc to build all atomic functions. Some CPUs have
> +          native support for lr and sc, but emulate amo instructions through
> +          trap handlers that are slow.
What's the actual hardware?
Is it emulated in m-mode?
Where is the code?

> +
> +          If you don't know what to do here, say n.
> +
>  config TOOLCHAIN_HAS_ZBB
>         bool
>         default y
> diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> index 5b96c2f61adb..f484babecb9e 100644
> --- a/arch/riscv/include/asm/atomic.h
> +++ b/arch/riscv/include/asm/atomic.h
> @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
>   * have the AQ or RL bits set.  These don't return anything, so there's only
>   * one version to worry about.
>   */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)             \
>  static __always_inline                                                 \
>  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)       \
> @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
>                 : "+A" (v->counter)                                     \
>                 : "r" (I)                                               \
>                 : "memory");                                            \
> -}                                                                      \
> +}
> +#else
> +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)             \
> +static __always_inline                                                 \
> +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)       \
> +{                                                                      \
> +       register c_type ret, temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               "1:     lr." #asm_type " %1, %0\n"                      \
> +               "       " #asm_op " %2, %1, %3\n"                       \
> +               "       sc." #asm_type " %2, %2, %0\n"                  \
> +               "       bnez %2, 1b\n"                                  \
> +               : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> +               : "r" (I)                                               \
> +               : "memory");                                            \
> +}
> +#endif
>
>  #ifdef CONFIG_GENERIC_ATOMIC64
>  #define ATOMIC_OPS(op, asm_op, I)                                      \
> @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
>   * There's two flavors of these: the arithmatic ops have both fetch and return
>   * versions, while the logical ops only have fetch versions.
>   */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)       \
>  static __always_inline                                                 \
>  c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,            \
> @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)   \
>                 : "memory");                                            \
>         return ret;                                                     \
>  }
> +#else
> +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)       \
> +static __always_inline                                                 \
> +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,            \
> +                                            atomic##prefix##_t *v)     \
> +{                                                                      \
> +       register c_type ret, temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               "1:     lr." #asm_type " %1, %0\n"                      \
> +               "       " #asm_op " %2, %1, %3\n"                       \
> +               "       sc." #asm_type " %2, %2, %0\n"                  \
> +               "       bnez %2, 1b\n"                                  \
> +               : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> +               : "r" (I)                                               \
> +               : "memory");                                            \
> +       return ret;                                                     \
> +}                                                                      \
> +static __always_inline                                                 \
> +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)       \
> +{                                                                      \
> +       register c_type ret, temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               "1:     lr." #asm_type ".aqrl %1, %0\n"                 \
> +               "       " #asm_op " %2, %1, %3\n"                       \
> +               "       sc." #asm_type ".aqrl %2, %2, %0\n"             \
> +               "       bnez %2, 1b\n"                                  \
> +               : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> +               : "r" (I)                                               \
> +               : "memory");                                            \
> +       return ret;                                                     \
> +}
> +#endif
>
>  #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)        \
>  static __always_inline                                                 \
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index fae152ea0508..b51cb18f7d9e 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
>
>  #if (BITS_PER_LONG == 64)
>  #define __AMO(op)      "amo" #op ".d"
> +#define __LR   "lr.d"
> +#define __SC   "sc.d"
>  #elif (BITS_PER_LONG == 32)
>  #define __AMO(op)      "amo" #op ".w"
> +#define __LR   "lr.w"
> +#define __SC   "sc.w"
>  #else
>  #error "Unexpected BITS_PER_LONG"
>  #endif
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define __test_and_op_bit_ord(op, mod, nr, addr, ord)          \
>  ({                                                             \
>         unsigned long __res, __mask;                            \
> @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
>                 : "+A" (addr[BIT_WORD(nr)])                     \
>                 : "r" (mod(BIT_MASK(nr)))                       \
>                 : "memory");
> +#else
> +#define __test_and_op_bit_ord(op, mod, nr, addr, ord)          \
> +({                                                             \
> +       unsigned long __res, __mask, __temp;                    \
> +       __mask = BIT_MASK(nr);                                  \
> +       __asm__ __volatile__ (                                  \
> +               "1: " __LR #ord " %0, %1\n"                     \
> +               #op " %2, %0, %3\n"                             \
> +               __SC #ord " %2, %2, %1\n"                       \
> +               "bnez %2, 1b\n"                                 \
> +               : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> +               : "r" (mod(__mask))                             \
> +               : "memory");                                    \
> +       ((__res & __mask) != 0);                                \
> +})
> +
> +#define __op_bit_ord(op, mod, nr, addr, ord)                   \
> +       unsigned long __res, __temp;                            \
> +       __asm__ __volatile__ (                                  \
> +               "1: " __LR #ord " %0, %1\n"                     \
> +               #op " %2, %0, %3\n"                             \
> +               __SC #ord " %2, %2, %1\n"                       \
> +               "bnez %2, 1b\n"                                 \
> +               : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> +               : "r" (mod(BIT_MASK(nr)))                       \
> +               : "memory")
> +#endif
>
>  #define __test_and_op_bit(op, mod, nr, addr)                   \
>         __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
>  static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
>                 volatile unsigned long *addr)
>  {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         unsigned long res;
>         __asm__ __volatile__ (
>                 __AMO(xor) ".rl %0, %2, %1"
>                 : "=r" (res), "+A" (*addr)
>                 : "r" (__NOP(mask))
>                 : "memory");
> +#else
> +       unsigned long res, temp;
> +
> +       __asm__ __volatile__ (
> +               "1: " __LR ".rl %0, %1\n"
> +               "xor %2, %0, %3\n"
> +               __SC ".rl %2, %2, %1\n"
> +               "bnez %2, 1b\n"
> +               : "=&r" (res), "+A" (*addr), "=&r" (temp)
> +               : "r" (__NOP(mask))
> +               : "memory");
> +#endif
>         return (res & BIT(7)) != 0;
>  }
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 4cadc56220fe..881082b05110 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -51,6 +51,7 @@
>         }                                                                       \
>  })
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define __arch_xchg(sfx, prepend, append, r, p, n)                     \
>  ({                                                                     \
>         __asm__ __volatile__ (                                          \
> @@ -61,6 +62,21 @@
>                 : "r" (n)                                               \
>                 : "memory");                                            \
>  })
> +#else
> +#define __arch_xchg(sfx, prepend, append, r, p, n)                     \
> +({                                                                     \
> +       __typeof__(*(__ptr)) temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               prepend                                                 \
> +               "1:     lr" sfx " %0, %1\n"                             \
> +               "       sc" sfx " %2, %3, %1\n"                         \
> +               "       bnez %2, 1b\n"                                  \
> +               append                                                  \
> +               : "=&r" (r), "+A" (*(p)), "=&r" (temp)                  \
> +               : "r" (n)                                               \
> +               : "memory");                                            \
> +})
> +#endif
>
>  #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                        \
>                    sc_append, swap_append)                              \
> diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> index fc8130f995c1..47297f47ec35 100644
> --- a/arch/riscv/include/asm/futex.h
> +++ b/arch/riscv/include/asm/futex.h
> @@ -19,6 +19,7 @@
>  #define __disable_user_access()                do { } while (0)
>  #endif
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)     \
>  {                                                              \
>         __enable_user_access();                                 \
> @@ -32,16 +33,39 @@
>         : "memory");                                            \
>         __disable_user_access();                                \
>  }
> +#else
> +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)     \
> +{                                                              \
> +       __enable_user_access();                                 \
> +       __asm__ __volatile__ (                                  \
> +       "1:     lr.w.aqrl %[ov], %[u]\n"                        \
> +       "       " insn "\n"                                     \
> +       "       sc.w.aqrl %[t], %[t], %[u]\n"                   \
> +       "       bnez %[t], 1b\n"                                \
> +       "2:\n"                                                  \
> +       _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])                  \
> +       : [r] "+r" (ret), [ov] "=&r" (oldval),                  \
> +         [t] "=&r" (temp), [u] "+m" (*uaddr)                   \
> +       : [op] "Jr" (oparg)                                     \
> +       : "memory");                                            \
> +       __disable_user_access();                                \
> +}
> +#endif
>
>  static inline int
>  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
>  {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         int oldval = 0, ret = 0;
> +#else
> +       int oldval = 0, ret = 0, temp = 0;
> +#endif
>
>         if (!access_ok(uaddr, sizeof(u32)))
>                 return -EFAULT;
>
>         switch (op) {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         case FUTEX_OP_SET:
>                 __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
>                                   ret, oldval, uaddr, oparg);
> @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
>                 __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
>                                   ret, oldval, uaddr, oparg);
>                 break;
> +#else
> +       case FUTEX_OP_SET:
> +               __futex_atomic_op("mv %[t], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +       case FUTEX_OP_ADD:
> +               __futex_atomic_op("add %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +       case FUTEX_OP_OR:
> +               __futex_atomic_op("or %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +       case FUTEX_OP_ANDN:
> +               __futex_atomic_op("and %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, ~oparg);
> +               break;
> +       case FUTEX_OP_XOR:
> +               __futex_atomic_op("xor %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +#endif
>         default:
>                 ret = -ENOSYS;
>         }
> --
> 2.25.1
>


-- 
Best Regards
 Guo Ren