Add a new cmpxchg interface: bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new); Where the boolean returns the result of the compare; and thus if the exchange happened; and in case of failure, the new value of *ptr is returned in *val. This allows simplification/improvement of loops like: for (;;) { new = val $op $imm; old = cmpxchg(ptr, val, new); if (old == val) break; val = old; } into: do { new = val $op $imm; } while (!try_cmpxchg(ptr, &val, new)); while also generating better code (GCC6 and onwards). Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/atomic.h | 6 +++ arch/x86/include/asm/atomic64_64.h | 6 +++ arch/x86/include/asm/cmpxchg.h | 69 +++++++++++++++++++++++++++++++++++++ include/linux/atomic.h | 42 ++++++++++++++++++++++ 4 files changed, 123 insertions(+) --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxch return cmpxchg(&v->counter, old, new); } +#define atomic_try_cmpxchg atomic_try_cmpxchg +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + return try_cmpxchg(&v->counter, old, new); +} + static inline int atomic_xchg(atomic_t *v, int new) { return xchg(&v->counter, new); --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -176,6 +176,12 @@ static inline long atomic64_cmpxchg(atom return cmpxchg(&v->counter, old, new); } +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new) +{ + return try_cmpxchg(&v->counter, old, new); +} + static inline long atomic64_xchg(atomic64_t *v, long new) { return xchg(&v->counter, new); --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -153,6 +153,75 @@ extern void __add_wrong_size(void) #define cmpxchg_local(ptr, old, new) \ __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) + +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ +({ \ + bool success; \ + __typeof__(_ptr) _old = (_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(_ptr); \ + asm volatile(lock "cmpxchgb %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "q" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(_ptr); \ + asm volatile(lock "cmpxchgw %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(_ptr); \ + asm volatile(lock "cmpxchgl %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(_ptr); \ + asm volatile(lock "cmpxchgq %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + *_old = __old; \ + success; \ +}) + +#define __try_cmpxchg(ptr, pold, new, size) \ + __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) + +#define try_cmpxchg(ptr, pold, new) \ + __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) + /* * xadd() adds "inc" to "*ptr" and atomically returns the previous * value of "*ptr". --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -423,6 +423,27 @@ #endif #endif /* atomic_cmpxchg_relaxed */ +#ifndef atomic_try_cmpxchg + +#define __atomic_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __o = *__po; \ + *__po = atomic_cmpxchg##type((_p), __o, (_n)); \ + (*__po == __o); \ +}) + +#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) +#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic_try_cmpxchg */ +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + /* cmpxchg_relaxed */ #ifndef cmpxchg_relaxed #define cmpxchg_relaxed cmpxchg @@ -996,6 +1017,27 @@ static inline int atomic_dec_if_positive #endif #endif /* atomic64_cmpxchg_relaxed */ +#ifndef atomic64_try_cmpxchg + +#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __o = *__po; \ + *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \ + (*__po == __o); \ +}) + +#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) +#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic64_try_cmpxchg */ +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) {