[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251231034523.47014-3-dongtai.guo@linux.dev>
Date: Wed, 31 Dec 2025 11:45:22 +0800
From: George Guo <dongtai.guo@...ux.dev>
To: hengqi.chen@...il.com
Cc: chenhuacai@...nel.org,
dongtai.guo@...ux.dev,
guodongtai@...inos.cn,
kernel@...0n.name,
lianyangyang@...inos.cn,
linux-kernel@...r.kernel.org,
loongarch@...ts.linux.dev,
r@....cc,
xry111@...111.site
Subject: [PATCH v8 loongarch-next 2/3] LoongArch: Add 128-bit atomic cmpxchg support
From: George Guo <guodongtai@...inos.cn>
Implement 128-bit atomic compare-and-exchange using LoongArch's
LL.D/SC.Q instructions.
For LoongArch CPUs lacking 128-bit atomic instruction(e.g.,
the SCQ instruction on 3A5000), use a spinlock to emulate
the atomic operation.
At the same time, fix BPF scheduler test failures (scx_central scx_qmap)
caused by kmalloc_nolock_noprof returning NULL due to missing
128-bit atomics. The NULL returns led to -ENOMEM errors during
scheduler initialization, causing test cases to fail.
Verified by testing with the scx_qmap scheduler (located in
tools/sched_ext/). Building with `make` and running
./tools/sched_ext/build/bin/scx_qmap.
Link: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/commit/?id=5fb750e8a9ae
Signed-off-by: George Guo <guodongtai@...inos.cn>
---
arch/loongarch/include/asm/cmpxchg.h | 66 ++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
index 0494c2ab553e..ef793bcb7b25 100644
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -8,6 +8,7 @@
#include <linux/bits.h>
#include <linux/build_bug.h>
#include <asm/barrier.h>
+#include <asm/cpu-features.h>
#define __xchg_amo_asm(amswap_db, m, val) \
({ \
@@ -137,6 +138,61 @@ __arch_xchg(volatile void *ptr, unsigned long x, int size)
__ret; \
})
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low;
+ u64 high;
+ };
+};
+
+#define __cmpxchg128_asm(ptr, old, new) \
+({ \
+ union __u128_halves __old, __new, __ret; \
+ volatile u64 *__ptr = (volatile u64 *)(ptr); \
+ \
+ __old.full = (old); \
+ __new.full = (new); \
+ \
+ __asm__ __volatile__( \
+ "1: ll.d %0, %3 # 128-bit cmpxchg low \n" \
+ __WEAK_LLSC_MB \
+ " ld.d %1, %4 # 128-bit cmpxchg high \n" \
+ " bne %0, %z5, 2f \n" \
+ " bne %1, %z6, 2f \n" \
+ " move $t0, %z7 \n" \
+ " move $t1, %z8 \n" \
+ " sc.q $t0, $t1, %2 \n" \
+ " beqz $t0, 1b \n" \
+ "2: \n" \
+ __WEAK_LLSC_MB \
+ : "=&r" (__ret.low), "=&r" (__ret.high) \
+ : "r" (__ptr), \
+ "ZC" (__ptr[0]), "m" (__ptr[1]), \
+ "Jr" (__old.low), "Jr" (__old.high), \
+ "Jr" (__new.low), "Jr" (__new.high) \
+ : "t0", "t1", "memory"); \
+ \
+ __ret.full; \
+})
+
+#define __cmpxchg128_locked(ptr, old, new) \
+({ \
+ u128 __ret; \
+ static DEFINE_SPINLOCK(lock); \
+ unsigned long flags; \
+ \
+ spin_lock_irqsave(&lock, flags); \
+ \
+ __ret = *(volatile u128 *)(ptr); \
+ if (__ret == (old)) \
+ *(volatile u128 *)(ptr) = (new); \
+ \
+ spin_unlock_irqrestore(&lock, flags); \
+ \
+ __ret; \
+})
+
static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old,
unsigned int new, unsigned int size)
{
@@ -224,6 +280,16 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int
__res; \
})
+/* cmpxchg128 */
+#define system_has_cmpxchg128() 1
+
+#define arch_cmpxchg128(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 16); \
+ cpu_has_scq ? __cmpxchg128_asm(ptr, o, n) : \
+ __cmpxchg128_locked(ptr, o, n); \
+})
+
#ifdef CONFIG_64BIT
#define arch_cmpxchg64_local(ptr, o, n) \
({ \
--
2.49.0
Powered by blists - more mailing lists