lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170206063215.GA9178@tardis.cn.ibm.com>
Date:   Mon, 6 Feb 2017 14:32:15 +0800
From:   Boqun Feng <boqun.feng@...il.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     elena.reshetova@...el.com, gregkh@...uxfoundation.org,
        keescook@...omium.org, arnd@...db.de, tglx@...utronix.de,
        mingo@...nel.org, h.peter.anvin@...el.com, will.deacon@....com,
        dwindsor@...il.com, dhowells@...hat.com,
        linux-kernel@...r.kernel.org, kernel-hardening@...ts.openwall.com
Subject: Re: [PATCH 4/5] atomic: Introduce atomic_try_cmpxchg()

On Mon, Feb 06, 2017 at 12:24:28PM +0800, Boqun Feng wrote:
> On Fri, Feb 03, 2017 at 02:26:02PM +0100, Peter Zijlstra wrote:
> > Add a new cmpxchg interface:
> > 
> >   bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new);
> > 
> > Where the boolean returns the result of the compare; and thus if the
> > exchange happened; and in case of failure, the new value of *ptr is
> > returned in *val.
> > 
> > This allows simplification/improvement of loops like:
> > 
> > 	for (;;) {
> > 		new = val $op $imm;
> > 		old = cmpxchg(ptr, val, new);
> > 		if (old == val)
> > 			break;
> > 		val = old;
> > 	}
> > 
> > into:
> > 
> > 	for (;;) {
> > 		new = val $op $imm;
> > 		if (try_cmpxchg(ptr, &val, new))
> > 			break;
> > 	}
> > 
> > while also generating better code (GCC6 and onwards).
> > 
> 
> But switching to try_cmpxchg() will make @val a memory location, which
> could not be put in a register. And this will generate unnecessary
> memory accesses on archs having enough registers(PPC, e.g.).
> 

Hmm.. it turns out that compilers can figure out that @val could be fit
in a register(maybe in the escape analysis step), so don't treat it as a
memory location. This is at least true for GCC 5.4.0 on PPC. So I think
we can rely on this?

> > Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> > ---
> > --- a/arch/x86/include/asm/atomic.h
> > +++ b/arch/x86/include/asm/atomic.h
> > @@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxch
> >  	return cmpxchg(&v->counter, old, new);
> >  }
> >  
> > +#define atomic_try_cmpxchg atomic_try_cmpxchg
> > +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
> > +{
> > +	return try_cmpxchg(&v->counter, old, new);
> > +}
> > +
> >  static inline int atomic_xchg(atomic_t *v, int new)
> >  {
> >  	return xchg(&v->counter, new);
> > --- a/arch/x86/include/asm/cmpxchg.h
> > +++ b/arch/x86/include/asm/cmpxchg.h
> > @@ -153,6 +153,75 @@ extern void __add_wrong_size(void)
> >  #define cmpxchg_local(ptr, old, new)					\
> >  	__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
> >  
> > +
> > +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
> > +({									\
> > +	bool success;							\
> > +	__typeof__(_ptr) _old = (_pold);				\
> > +	__typeof__(*(_ptr)) __old = *_old;				\
> > +	__typeof__(*(_ptr)) __new = (_new);				\
> > +	switch (size) {							\
> > +	case __X86_CASE_B:						\
> > +	{								\
> > +		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "q" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	case __X86_CASE_W:						\
> > +	{								\
> > +		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "r" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	case __X86_CASE_L:						\
> > +	{								\
> > +		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "r" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	case __X86_CASE_Q:						\
> > +	{								\
> > +		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "r" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	default:							\
> > +		__cmpxchg_wrong_size();					\
> > +	}								\
> > +	*_old = __old;							\
> > +	success;							\
> > +})
> > +
> > +#define __try_cmpxchg(ptr, pold, new, size)				\
> > +	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
> > +
> > +#define try_cmpxchg(ptr, pold, new)					\
> > +	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
> > +
> >  /*
> >   * xadd() adds "inc" to "*ptr" and atomically returns the previous
> >   * value of "*ptr".
> > --- a/include/linux/atomic.h
> > +++ b/include/linux/atomic.h
> > @@ -423,6 +423,28 @@
> >  #endif
> >  #endif /* atomic_cmpxchg_relaxed */
> >  
> > +#ifndef atomic_try_cmpxchg
> > +
> > +#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
> > +({									\
> > +	typeof(_po) __po = (_po);					\
> > +	typeof(*(_po)) __o = *__po;					\
> > +	bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o);	\
> > +	*__po = __o;							\
> 
> Besides, is this part correct? atomic_cmpxchg_*() wouldn't change the
> value of __o, so *__po wouldn't be changed.. IOW, in case of failure,
> *ptr wouldn't be updated to a new value.
> 
> Maybe this should be:
> 
> 	bool success;
> 	*__po = atomic_cmpxchg##type((_p), __o, (_n));
> 	sucess = (*__po == _o);

typo... should be

	success = (*__po == __o);

Regards,
Boqun

> 
> , right?
> 
> Regards,
> Boqun
> 
> > +	success;							\
> > +})
> > +
> > +#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
> > +#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
> > +#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
> > +#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
> > +
> > +#else /* atomic_try_cmpxchg */
> > +#define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
> > +#define atomic_try_cmpxchg_acquire	atomic_try_cmpxchg
> > +#define atomic_try_cmpxchg_release	atomic_try_cmpxchg
> > +#endif /* atomic_try_cmpxchg */
> > +
> >  /* cmpxchg_relaxed */
> >  #ifndef cmpxchg_relaxed
> >  #define  cmpxchg_relaxed		cmpxchg
> > 
> > 



Download attachment "signature.asc" of type "application/pgp-signature" (489 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ