[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJF2gTSJi3a7CEUzTwofP3dwFiD1+LssM6xy-uFDqOczGByd+A@mail.gmail.com>
Date: Tue, 12 Apr 2022 13:21:10 +0800
From: Guo Ren <guoren@...nel.org>
To: Guo Ren <guoren@...nel.org>, Arnd Bergmann <arnd@...db.de>,
Mark Rutland <mark.rutland@....com>
Cc: linux-arch <linux-arch@...r.kernel.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
linux-csky@...r.kernel.org, Guo Ren <guoren@...ux.alibaba.com>
Subject: Re: [PATCH V2 2/2] csky: atomic: Add custom atomic.h implementation
On Mon, Apr 11, 2022 at 10:52 PM <guoren@...nel.org> wrote:
>
> From: Guo Ren <guoren@...ux.alibaba.com>
>
> The generic atomic.h used cmpxchg to implement the atomic
> operations, it will cause daul loop to reduce the forward
> guarantee. The patch implement csky custom atomic operations with
> ldex/stex instructions for the best performance.
>
> Important reference comment by Rutland:
> 8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for
> full barrier semantics")
>
> Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
> Signed-off-by: Guo Ren <guoren@...ux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@...nel.org>
> Cc: Mark Rutland <mark.rutland@....com>
> ---
> Changes in V2:
> - Fixup use of acquire + release for barrier semantics by Rutland.
> ---
> arch/csky/include/asm/atomic.h | 130 +++++++++++++++++++++++++++++++++
> 1 file changed, 130 insertions(+)
> create mode 100644 arch/csky/include/asm/atomic.h
>
> diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
> new file mode 100644
> index 000000000000..2e1a22f55ea1
> --- /dev/null
> +++ b/arch/csky/include/asm/atomic.h
> @@ -0,0 +1,130 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_CSKY_ATOMIC_H
> +#define __ASM_CSKY_ATOMIC_H
> +
> +#ifdef CONFIG_SMP
> +# include <asm-generic/atomic64.h>
> +
> +#include <asm/cmpxchg.h>
> +#include <asm/barrier.h>
> +
> +#define __atomic_acquire_fence() __smp_acquire_fence()
> +
> +#define __atomic_release_fence() __smp_release_fence()
> +
> +static __always_inline int arch_atomic_read(const atomic_t *v)
> +{
> + return READ_ONCE(v->counter);
> +}
> +static __always_inline void arch_atomic_set(atomic_t *v, int i)
> +{
> + WRITE_ONCE(v->counter, i);
> +}
> +
> +#define ATOMIC_OP(op, asm_op, I) \
> +static __always_inline \
> +void arch_atomic_##op(int i, atomic_t *v) \
> +{ \
> + unsigned long tmp; \
> + __asm__ __volatile__ ( \
> + "1: ldex.w %0, (%2) \n" \
> + " " #op " %0, %1 \n" \
> + " stex.w %0, (%2) \n" \
> + " bez %0, 1b \n" \
> + : "=&r" (tmp) \
> + : "r" (I), "r" (&v->counter) \
> + : "memory"); \
> +}
> +
> +ATOMIC_OP(add, add, i)
> +ATOMIC_OP(sub, add, -i)
> +ATOMIC_OP(and, and, i)
> +ATOMIC_OP( or, or, i)
> +ATOMIC_OP(xor, xor, i)
Sorry, it should be fixed up by:
#define ATOMIC_OP(op) \
static __always_inline \
void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long tmp; \
__asm__ __volatile__ ( \
"1: ldex.w %0, (%2) \n" \
" " #op " %0, %1 \n" \
" stex.w %0, (%2) \n" \
" bez %0, 1b \n" \
: "=&r" (tmp) \
: "r" (i), "r" (&v->counter) \
: "memory"); \
}
ATOMIC_OP(add)
ATOMIC_OP(sub)
ATOMIC_OP(and)
ATOMIC_OP( or)
ATOMIC_OP(xor)
> +
> +#undef ATOMIC_OP
> +
> +#define ATOMIC_FETCH_OP(op, asm_op, I) \
> +static __always_inline \
> +int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
> +{ \
> + register int ret, tmp; \
> + __asm__ __volatile__ ( \
> + "1: ldex.w %0, (%3) \n" \
> + " mov %1, %0 \n" \
> + " " #op " %0, %2 \n" \
> + " stex.w %0, (%3) \n" \
> + " bez %0, 1b \n" \
> + : "=&r" (tmp), "=&r" (ret) \
> + : "r" (I), "r"(&v->counter) \
> + : "memory"); \
> + return ret; \
> +}
> +
> +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I) \
> +static __always_inline \
> +int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
> +{ \
> + return arch_atomic_fetch_##op##_relaxed(i, v) c_op I; \
> +}
> +
> +#define ATOMIC_OPS(op, asm_op, c_op, I) \
> + ATOMIC_FETCH_OP( op, asm_op, I) \
> + ATOMIC_OP_RETURN(op, asm_op, c_op, I)
> +
> +ATOMIC_OPS(add, add, +, i)
> +ATOMIC_OPS(sub, add, +, -i)
> +
> +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
> +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
> +
> +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
> +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
> +
> +#undef ATOMIC_OPS
> +#undef ATOMIC_OP_RETURN
> +
> +#define ATOMIC_OPS(op, asm_op, I) \
> + ATOMIC_FETCH_OP(op, asm_op, I)
> +
> +ATOMIC_OPS(and, and, i)
> +ATOMIC_OPS( or, or, i)
> +ATOMIC_OPS(xor, xor, i)
> +
> +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
> +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
> +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
> +
> +#undef ATOMIC_OPS
> +
> +#undef ATOMIC_FETCH_OP
> +
> +#define ATOMIC_OP() \
> +static __always_inline \
> +int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
> +{ \
> + return __xchg_relaxed(n, &(v->counter), 4); \
> +} \
> +static __always_inline \
> +int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \
> +{ \
> + return __cmpxchg_relaxed(&(v->counter), o, n, 4); \
> +}
> +
> +#define ATOMIC_OPS() \
> + ATOMIC_OP()
> +
> +ATOMIC_OPS()
> +
> +#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
> +#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
> +
> +#undef ATOMIC_OPS
> +#undef ATOMIC_OP
> +
> +#else
> +# include <asm-generic/atomic.h>
> +#endif
> +
> +#endif /* __ASM_CSKY_ATOMIC_H */
> --
> 2.25.1
>
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
Powered by blists - more mailing lists