[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220510154217.5216-1-ubizjak@gmail.com>
Date: Tue, 10 May 2022 17:42:17 +0200
From: Uros Bizjak <ubizjak@...il.com>
To: x86@...nel.org, linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc: Uros Bizjak <ubizjak@...il.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"H. Peter Anvin" <hpa@...or.com>, Will Deacon <will@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Boqun Feng <boqun.feng@...il.com>,
Mark Rutland <mark.rutland@....com>,
"Paul E. McKenney" <paulmck@...nel.org>,
Marco Elver <elver@...gle.com>
Subject: [PATCH] locking/atomic/x86: Introduce try_cmpxchg64
This patch adds try_cmpxchg64 to improve code around cmpxchg8b. While
the resulting code improvements on x86_64 are minor (a compare and a move saved),
the improvements on x86_32 are quite noticeable. The code improves from:
84: 89 74 24 30 mov %esi,0x30(%esp)
88: 89 fe mov %edi,%esi
8a: 0f b7 0c 02 movzwl (%edx,%eax,1),%ecx
8e: c1 e1 08 shl $0x8,%ecx
91: 0f b7 c9 movzwl %cx,%ecx
94: 89 4c 24 34 mov %ecx,0x34(%esp)
98: 8b 96 24 1e 00 00 mov 0x1e24(%esi),%edx
9e: 8b 86 20 1e 00 00 mov 0x1e20(%esi),%eax
a4: 8b 5c 24 34 mov 0x34(%esp),%ebx
a8: 8b 7c 24 30 mov 0x30(%esp),%edi
ac: 89 44 24 38 mov %eax,0x38(%esp)
b0: 0f b6 44 24 38 movzbl 0x38(%esp),%eax
b5: 8b 4c 24 38 mov 0x38(%esp),%ecx
b9: 89 54 24 3c mov %edx,0x3c(%esp)
bd: 83 e0 fd and $0xfffffffd,%eax
c0: 89 5c 24 64 mov %ebx,0x64(%esp)
c4: 8b 54 24 3c mov 0x3c(%esp),%edx
c8: 89 4c 24 60 mov %ecx,0x60(%esp)
cc: 8b 4c 24 34 mov 0x34(%esp),%ecx
d0: 88 44 24 60 mov %al,0x60(%esp)
d4: 8b 44 24 38 mov 0x38(%esp),%eax
d8: c6 44 24 62 f2 movb $0xf2,0x62(%esp)
dd: 8b 5c 24 60 mov 0x60(%esp),%ebx
e1: f0 0f c7 0f lock cmpxchg8b (%edi)
e5: 89 d1 mov %edx,%ecx
e7: 8b 54 24 3c mov 0x3c(%esp),%edx
eb: 33 44 24 38 xor 0x38(%esp),%eax
ef: 31 ca xor %ecx,%edx
f1: 09 c2 or %eax,%edx
f3: 75 a3 jne 98 <t+0x98>
to:
84: 0f b7 0c 02 movzwl (%edx,%eax,1),%ecx
88: c1 e1 08 shl $0x8,%ecx
8b: 0f b7 c9 movzwl %cx,%ecx
8e: 8b 86 20 1e 00 00 mov 0x1e20(%esi),%eax
94: 8b 96 24 1e 00 00 mov 0x1e24(%esi),%edx
9a: 89 4c 24 64 mov %ecx,0x64(%esp)
9e: 89 c3 mov %eax,%ebx
a0: 89 44 24 60 mov %eax,0x60(%esp)
a4: 83 e3 fd and $0xfffffffd,%ebx
a7: c6 44 24 62 f2 movb $0xf2,0x62(%esp)
ac: 88 5c 24 60 mov %bl,0x60(%esp)
b0: 8b 5c 24 60 mov 0x60(%esp),%ebx
b4: f0 0f c7 0f lock cmpxchg8b (%edi)
b8: 75 d4 jne 8e <t+0x8e>
The implementation extends the implementation of existing cmpxchg64.
Signed-off-by: Uros Bizjak <ubizjak@...il.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Dave Hansen <dave.hansen@...ux.intel.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: Will Deacon <will@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Boqun Feng <boqun.feng@...il.com>
Cc: Mark Rutland <mark.rutland@....com>
Cc: "Paul E. McKenney" <paulmck@...nel.org>
Cc: Marco Elver <elver@...gle.com>
---
arch/x86/include/asm/cmpxchg_32.h | 43 ++++++++++++++++++++++
arch/x86/include/asm/cmpxchg_64.h | 6 +++
include/linux/atomic/atomic-instrumented.h | 40 +++++++++++++++++++-
scripts/atomic/gen-atomic-instrumented.sh | 2 +-
4 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 0a7fe0321613..e874ff7f7529 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
+#define arch_try_cmpxchg64(ptr, po, n) \
+ ((__typeof__(*(ptr)))__try_cmpxchg64((ptr), (unsigned long long *)(po), \
+ (unsigned long long)(n)))
#endif
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
@@ -70,6 +73,25 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
return prev;
}
+static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+{
+ bool success;
+ u64 prev;
+ asm volatile(LOCK_PREFIX "cmpxchg8b %2"
+ CC_SET(z)
+ : CC_OUT(z) (success),
+ "=A" (prev),
+ "+m" (*ptr)
+ : "b" ((u32)new),
+ "c" ((u32)(new >> 32)),
+ "1" (*pold)
+ : "memory");
+
+ if (unlikely(!success))
+ *pold = prev;
+ return success;
+}
+
#ifndef CONFIG_X86_CMPXCHG64
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
@@ -108,6 +130,27 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
: "memory"); \
__ret; })
+#define arch_try_cmpxchg64(ptr, po, n) \
+({ \
+ bool success; \
+ __typeof__(*(ptr)) __prev; \
+ __typeof__(ptr) _old = (__typeof__(ptr))(po); \
+ __typeof__(*(ptr)) __old = *_old; \
+ __typeof__(*(ptr)) __new = (n); \
+ alternative_io(LOCK_PREFIX_HERE \
+ "call cmpxchg8b_emu", \
+ "lock; cmpxchg8b (%%esi)" , \
+ X86_FEATURE_CX8, \
+ "=A" (__prev), \
+ "S" ((ptr)), "0" (__old), \
+ "b" ((unsigned int)__new), \
+ "c" ((unsigned int)(__new>>32)) \
+ : "memory"); \
+ success = (__prev == __old); \
+ if (unlikely(!success)) \
+ *_old = __prev; \
+ likely(success); \
+})
#endif
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 072e5459fe2f..250187ac8248 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -19,6 +19,12 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
arch_cmpxchg_local((ptr), (o), (n)); \
})
+#define arch_try_cmpxchg64(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg((ptr), (po), (n)); \
+})
+
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 5d69b143c28e..7a139ec030b0 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -2006,6 +2006,44 @@ atomic_long_dec_if_positive(atomic_long_t *v)
arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})
+#define try_cmpxchg64(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ kcsan_mb(); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_acquire(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_release(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ kcsan_release(); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_relaxed(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
#define cmpxchg_local(ptr, ...) \
({ \
typeof(ptr) __ai_ptr = (ptr); \
@@ -2045,4 +2083,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
})
#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 87c974b93032afd42143613434d1a7788fa598f9
+// 764f741eb77a7ad565dc8d99ce2837d5542e8aee
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 68f902731d01..77c06526a574 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -166,7 +166,7 @@ grep '^[a-z]' "$1" | while read name meta args; do
done
-for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg" "try_cmpxchg64"; do
for order in "" "_acquire" "_release" "_relaxed"; do
gen_xchg "${xchg}" "${order}" ""
printf "\n"
--
2.35.1
Powered by blists - more mailing lists