[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20220424072918.2596899-4-guoren@kernel.org>
Date: Sun, 24 Apr 2022 15:29:18 +0800
From: guoren@...nel.org
To: guoren@...nel.org, arnd@...db.de, mark.rutland@....com,
boqun.feng@...il.com, peterz@...radead.org, will@...nel.org
Cc: linux-arch@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-csky@...r.kernel.org, Guo Ren <guoren@...ux.alibaba.com>
Subject: [PATCH V4 3/3] csky: atomic: Add conditional atomic operations' optimization
From: Guo Ren <guoren@...ux.alibaba.com>
Add conditional atomic operations' optimization:
- arch_atomic_fetch_add_unless
- arch_atomic_inc_unless_negative
- arch_atomic_dec_unless_positive
- arch_atomic_dec_if_positive
Comments by Boqun:
FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.
Signed-off-by: Guo Ren <guoren@...ux.alibaba.com>
Signed-off-by: Guo Ren <guoren@...nel.org>
Cc: Boqun Feng <boqun.feng@...il.com>
---
arch/csky/include/asm/atomic.h | 95 ++++++++++++++++++++++++++++++++++
1 file changed, 95 insertions(+)
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
index 56c9dc8e91b3..60406ef9c2bb 100644
--- a/arch/csky/include/asm/atomic.h
+++ b/arch/csky/include/asm/atomic.h
@@ -100,6 +100,101 @@ ATOMIC_OPS(xor)
#undef ATOMIC_FETCH_OP
+static __always_inline int
+arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+ int prev, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%3) \n"
+ " cmpne %0, %4 \n"
+ " bf 2f \n"
+ " mov %1, %0 \n"
+ " add %1, %2 \n"
+ " stex.w %1, (%3) \n"
+ " bez %1, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (a), "r" (&v->counter), "r" (u)
+ : "memory");
+
+ return prev;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+static __always_inline bool
+arch_atomic_inc_unless_negative(atomic_t *v)
+{
+ int rc, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " movi %1, 0 \n"
+ " blz %0, 2f \n"
+ " movi %1, 1 \n"
+ " addi %0, 1 \n"
+ " stex.w %0, (%2) \n"
+ " bez %0, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (tmp), "=&r" (rc)
+ : "r" (&v->counter)
+ : "memory");
+
+ return tmp ? true : false;
+
+}
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+
+static __always_inline bool
+arch_atomic_dec_unless_positive(atomic_t *v)
+{
+ int rc, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " movi %1, 0 \n"
+ " bhz %0, 2f \n"
+ " movi %1, 1 \n"
+ " subi %0, 1 \n"
+ " stex.w %0, (%2) \n"
+ " bez %0, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (tmp), "=&r" (rc)
+ : "r" (&v->counter)
+ : "memory");
+
+ return tmp ? true : false;
+}
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+
+static __always_inline int
+arch_atomic_dec_if_positive(atomic_t *v)
+{
+ int dec, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " subi %1, %0, 1 \n"
+ " blz %1, 2f \n"
+ " stex.w %1, (%2) \n"
+ " bez %1, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (dec), "=&r" (tmp)
+ : "r" (&v->counter)
+ : "memory");
+
+ return dec - 1;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+
#define ATOMIC_OP() \
static __always_inline \
int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
--
2.25.1
Powered by blists - more mailing lists