lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170325075156.GF32474@worktop>
Date:   Sat, 25 Mar 2017 08:51:56 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Linus Torvalds <torvalds@...ux-foundation.org>
Cc:     Andy Lutomirski <luto@...capital.net>,
        Dmitry Vyukov <dvyukov@...gle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Andy Lutomirski <luto@...nel.org>,
        Borislav Petkov <bp@...en8.de>,
        Brian Gerst <brgerst@...il.com>,
        Denys Vlasenko <dvlasenk@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Paul McKenney <paulmck@...ux.vnet.ibm.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...nel.org>,
        LKML <linux-kernel@...r.kernel.org>
Subject: Re: locking/atomic: Introduce atomic_try_cmpxchg()

On Fri, Mar 24, 2017 at 10:23:29PM +0100, Peter Zijlstra wrote:

> I'll try and redo the patches that landed in tip and see what it does
> for total vmlinux size somewhere tomorrow.

   text    data     bss     dec     hex filename
10726413        4540256  843776 16110445         f5d36d defconfig-build/vmlinux.pre
10730509        4540256  843776 16114541         f5e36d defconfig-build/vmlinux.post

:-(

---
 arch/x86/include/asm/atomic.h      | 18 ++++++-------
 arch/x86/include/asm/atomic64_64.h | 24 ++++++++++--------
 arch/x86/include/asm/cmpxchg.h     | 50 ++++++++++++++++++++----------------
 include/linux/atomic.h             | 52 +++++++++++++++++++++++++-------------
 lib/refcount.c                     | 35 ++++++++++++++-----------
 5 files changed, 104 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index caa5798..c80d4914 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -186,11 +186,8 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return cmpxchg(&v->counter, old, new);
 }
 
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
-	return try_cmpxchg(&v->counter, old, new);
-}
+#define atomic_try_cmpxchg(_ptr, _old, _new, _label)			\
+	try_cmpxchg(&(_ptr)->counter, _old, _new, _label)
 
 static inline int atomic_xchg(atomic_t *v, int new)
 {
@@ -210,8 +207,9 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 {									\
 	int val = atomic_read(v);					\
-	do {								\
-	} while (!atomic_try_cmpxchg(v, &val, val c_op i));		\
+	for (;;)							\
+		val = atomic_try_cmpxchg(v, val, val c_op i, success);	\
+success:								\
 	return val;							\
 }
 
@@ -239,10 +237,12 @@ ATOMIC_OPS(xor, ^)
 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c = atomic_read(v);
-	do {
+	for (;;) {
 		if (unlikely(c == u))
 			break;
-	} while (!atomic_try_cmpxchg(v, &c, c + a));
+		c = atomic_try_cmpxchg(v, c, c + a, success);
+	}
+success:
 	return c;
 }
 
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6189a43..489c3e2 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -176,11 +176,8 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
 	return cmpxchg(&v->counter, old, new);
 }
 
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
-{
-	return try_cmpxchg(&v->counter, old, new);
-}
+#define atomic64_try_cmpxchg(_ptr, _old, _new, _label)			\
+	try_cmpxchg(&(_ptr)->counter, _old, _new, _label)
 
 static inline long atomic64_xchg(atomic64_t *v, long new)
 {
@@ -199,10 +196,12 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
 static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
 	long c = atomic64_read(v);
-	do {
+	for (;;) {
 		if (unlikely(c == u))
 			return false;
-	} while (!atomic64_try_cmpxchg(v, &c, c + a));
+		c = atomic64_try_cmpxchg(v, c, c + a, success);
+	}
+success:
 	return true;
 }
 
@@ -218,11 +217,13 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long dec, c = atomic64_read(v);
-	do {
+	for (;;) {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-	} while (!atomic64_try_cmpxchg(v, &c, dec));
+		c = atomic64_try_cmpxchg(v, c, dec, success);
+	}
+success:
 	return dec;
 }
 
@@ -239,8 +240,9 @@ static inline void atomic64_##op(long i, atomic64_t *v)			\
 static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
 {									\
 	long val = atomic64_read(v);					\
-	do {								\
-	} while (!atomic64_try_cmpxchg(v, &val, val c_op i));		\
+	for (;;)							\
+		val = atomic64_try_cmpxchg(v, val, val c_op i, success);\
+success:								\
 	return val;							\
 }
 
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index fb961db..e6b8a8f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -154,22 +154,24 @@ extern void __add_wrong_size(void)
 	__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
 
 
-#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
+#define __raw_try_cmpxchg(_ptr, _old, _new, success_label, size, lock)	\
 ({									\
-	bool success;							\
-	__typeof__(_ptr) _old = (_pold);				\
-	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __old = (_old);				\
 	__typeof__(*(_ptr)) __new = (_new);				\
+	__typeof__(*(_ptr)) __ret;					\
+	bool __success;							\
+									\
 	switch (size) {							\
 	case __X86_CASE_B:						\
 	{								\
 		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
 		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
 			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+			     : CC_OUT(z) (__success),			\
 			       [ptr] "+m" (*__ptr),			\
-			       [old] "+a" (__old)			\
-			     : [new] "q" (__new)			\
+			       [old] "=a" (__ret)			\
+			     : [new] "q" (__new),			\
+			       "2" (__old)				\
 			     : "memory");				\
 		break;							\
 	}								\
@@ -178,10 +180,11 @@ extern void __add_wrong_size(void)
 		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
 		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
 			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+			     : CC_OUT(z) (__success),			\
 			       [ptr] "+m" (*__ptr),			\
-			       [old] "+a" (__old)			\
-			     : [new] "r" (__new)			\
+			       [old] "=a" (__ret)			\
+			     : [new] "r" (__new),			\
+			       "2" (__old)				\
 			     : "memory");				\
 		break;							\
 	}								\
@@ -190,10 +193,11 @@ extern void __add_wrong_size(void)
 		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
 		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
 			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+			     : CC_OUT(z) (__success),			\
 			       [ptr] "+m" (*__ptr),			\
-			       [old] "+a" (__old)			\
-			     : [new] "r" (__new)			\
+			       [old] "=a" (__ret)			\
+			     : [new] "r" (__new),			\
+			       "2" (__old)				\
 			     : "memory");				\
 		break;							\
 	}								\
@@ -202,25 +206,27 @@ extern void __add_wrong_size(void)
 		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
 		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
 			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+			     : CC_OUT(z) (__success),			\
 			       [ptr] "+m" (*__ptr),			\
-			       [old] "+a" (__old)			\
-			     : [new] "r" (__new)			\
+			       [old] "=a" (__ret)			\
+			     : [new] "r" (__new),			\
+			       "2" (__old)				\
 			     : "memory");				\
 		break;							\
 	}								\
 	default:							\
 		__cmpxchg_wrong_size();					\
 	}								\
-	*_old = __old;							\
-	success;							\
+									\
+	if (likely(__success)) goto success_label;			\
+	__ret;								\
 })
 
-#define __try_cmpxchg(ptr, pold, new, size)				\
-	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+#define __try_cmpxchg(ptr, pold, new, success_label, size)		\
+	__raw_try_cmpxchg((ptr), (pold), (new), success_label, (size), LOCK_PREFIX)
 
-#define try_cmpxchg(ptr, pold, new)					\
-	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+#define try_cmpxchg(ptr, pold, new, success_label)			\
+	__try_cmpxchg((ptr), (pold), (new), success_label, sizeof(*(ptr)))
 
 /*
  * xadd() adds "inc" to "*ptr" and atomically returns the previous
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index aae5953..13a6eac 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -425,18 +425,26 @@
 
 #ifndef atomic_try_cmpxchg
 
-#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
+#define __atomic_try_cmpxchg(type, _p, _o, _n, _label)			\
 ({									\
-	typeof(_po) __po = (_po);					\
-	typeof(*(_po)) __o = *__po;					\
-	*__po = atomic_cmpxchg##type((_p), __o, (_n));			\
-	(*__po == __o);							\
+	typeof(*(_p)) __r; 						\
+ 	typeof(*(_p)) __o = (_o);					\
+	__r = atomic_cmpxchg##type((_p), __o, (_n));			\
+ 	if (__r == __o) goto _label;					\
+ 	__r;								\
 })
 
-#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
-#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
+#define atomic_try_cmpxchg(_p, _o, _n, _l)				\
+	__atomic_try_cmpxchg(, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_relaxed(_p, _o, _n, _l)			\
+	__atomic_try_cmpxchg(_relaxed, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_acquire(_p, _o, _n, _l)			\
+	__atomic_try_cmpxchg(_acquire, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_release(_p, _o, _n, _l)			\
+	__atomic_try_cmpxchg(_release, _p, _o, _n, _l)
 
 #else /* atomic_try_cmpxchg */
 #define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
@@ -1019,18 +1027,26 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 
 #ifndef atomic64_try_cmpxchg
 
-#define __atomic64_try_cmpxchg(type, _p, _po, _n)			\
+#define __atomic64_try_cmpxchg(type, _p, _o, _n, _label)		\
 ({									\
-	typeof(_po) __po = (_po);					\
-	typeof(*(_po)) __o = *__po;					\
-	*__po = atomic64_cmpxchg##type((_p), __o, (_n));		\
-	(*__po == __o);							\
+	typeof(*(_p)) __r;						\
+	typeof(*(_p)) __o = (_o);					\
+	__r = atomic64_cmpxchg##type((_p), __o, (_n));			\
+	if (__r == __o) goto _label;					\
+	__r;								\
 })
 
-#define atomic64_try_cmpxchg(_p, _po, _n)		__atomic64_try_cmpxchg(, _p, _po, _n)
-#define atomic64_try_cmpxchg_relaxed(_p, _po, _n)	__atomic64_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic64_try_cmpxchg_acquire(_p, _po, _n)	__atomic64_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic64_try_cmpxchg_release(_p, _po, _n)	__atomic64_try_cmpxchg(_release, _p, _po, _n)
+#define atomic64_try_cmpxchg(_p, _o, _n, _l)				\
+	__atomic64_try_cmpxchg(, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_relaxed(_p, _o, _n, _l)			\
+	__atomic64_try_cmpxchg(_relaxed, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_acquire(_p, _o, _n, _l)			\
+	__atomic64_try_cmpxchg(_acquire, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_release(_p, _o, _n, _l)			\
+	__atomic64_try_cmpxchg(_release, _p, _o, _n, _l)
 
 #else /* atomic64_try_cmpxchg */
 #define atomic64_try_cmpxchg_relaxed	atomic64_try_cmpxchg
diff --git a/lib/refcount.c b/lib/refcount.c
index f42124c..18b8926 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -59,7 +59,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
-	do {
+	for (;;) {
 		if (!val)
 			return false;
 
@@ -70,8 +70,9 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 		if (new < val)
 			new = UINT_MAX;
 
-	} while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
-
+		val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success);
+	}
+success:
 	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
 	return true;
@@ -116,7 +117,7 @@ bool refcount_inc_not_zero(refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
-	do {
+	for (;;) {
 		new = val + 1;
 
 		if (!val)
@@ -125,8 +126,9 @@ bool refcount_inc_not_zero(refcount_t *r)
 		if (unlikely(!new))
 			return true;
 
-	} while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
-
+		val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success);
+	}
+success:
 	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
 	return true;
@@ -175,7 +177,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
-	do {
+	for (;;) {
 		if (unlikely(val == UINT_MAX))
 			return false;
 
@@ -185,8 +187,9 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 			return false;
 		}
 
-	} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
-
+		val = atomic_try_cmpxchg_release(&r->refs, val, new, success);
+	}
+success:
 	return !new;
 }
 EXPORT_SYMBOL_GPL(refcount_sub_and_test);
@@ -244,9 +247,10 @@ EXPORT_SYMBOL_GPL(refcount_dec);
  */
 bool refcount_dec_if_one(refcount_t *r)
 {
-	int val = 1;
-
-	return atomic_try_cmpxchg_release(&r->refs, &val, 0);
+	atomic_try_cmpxchg_release(&r->refs, 1, 0, success);
+	return false;
+success:
+	return true;
 }
 EXPORT_SYMBOL_GPL(refcount_dec_if_one);
 
@@ -265,7 +269,7 @@ bool refcount_dec_not_one(refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
-	do {
+	for (;;) {
 		if (unlikely(val == UINT_MAX))
 			return true;
 
@@ -278,8 +282,9 @@ bool refcount_dec_not_one(refcount_t *r)
 			return true;
 		}
 
-	} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
-
+		val = atomic_try_cmpxchg_release(&r->refs, val, new, success);
+	}
+success:
 	return true;
 }
 EXPORT_SYMBOL_GPL(refcount_dec_not_one);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ