lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170324172342.radlrhk2z6mwmdgk@hirez.programming.kicks-ass.net>
Date:   Fri, 24 Mar 2017 18:23:42 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Andy Lutomirski <luto@...capital.net>
Cc:     Dmitry Vyukov <dvyukov@...gle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Andy Lutomirski <luto@...nel.org>,
        Borislav Petkov <bp@...en8.de>,
        Brian Gerst <brgerst@...il.com>,
        Denys Vlasenko <dvlasenk@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Paul McKenney <paulmck@...ux.vnet.ibm.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...nel.org>,
        LKML <linux-kernel@...r.kernel.org>
Subject: Re: locking/atomic: Introduce atomic_try_cmpxchg()

On Fri, Mar 24, 2017 at 09:54:46AM -0700, Andy Lutomirski wrote:
> > So the first snipped I tested regressed like so:
> >
> >
> > 0000000000000000 <T_refcount_inc>:                              0000000000000000 <T_refcount_inc>:
> >    0:   8b 07                   mov    (%rdi),%eax                 0:   8b 17                   mov    (%rdi),%edx
> >    2:   83 f8 ff                cmp    $0xffffffff,%eax            2:   83 fa ff                cmp    $0xffffffff,%edx
> >    5:   74 13                   je     1a <T_refcount_inc+0x1a>    5:   74 1a                   je     21 <T_refcount_inc+0x21>
> >    7:   85 c0                   test   %eax,%eax                   7:   85 d2                   test   %edx,%edx
> >    9:   74 0d                   je     18 <T_refcount_inc+0x18>    9:   74 13                   je     1e <T_refcount_inc+0x1e>
> >    b:   8d 50 01                lea    0x1(%rax),%edx              b:   8d 4a 01                lea    0x1(%rdx),%ecx
> >    e:   f0 0f b1 17             lock cmpxchg %edx,(%rdi)           e:   89 d0                   mov    %edx,%eax
> >   12:   75 ee                   jne    2 <T_refcount_inc+0x2>     10:   f0 0f b1 0f             lock cmpxchg %ecx,(%rdi)
> >   14:   ff c2                   inc    %edx                       14:   74 04                   je     1a <T_refcount_inc+0x1a>
> >   16:   75 02                   jne    1a <T_refcount_inc+0x1a>   16:   89 c2                   mov    %eax,%edx
> >   18:   0f 0b                   ud2                               18:   eb e8                   jmp    2 <T_refcount_inc+0x2>
> >   1a:   c3                      retq                              1a:   ff c1                   inc    %ecx
> >                                                                   1c:   75 03                   jne    21 <T_refcount_inc+0x21>
> >                                                                   1e:   0f 0b                   ud2
> >                                                                   20:   c3                      retq
> >                                                                   21:   c3                      retq
> 
> Can you re-send the better asm you got earlier?

On the left?

> If I pretend to be a dumb compiler, I wonder if you'd get better results with:
> 
> if (!success) {
>   *_old = __old;
>   return false;
> } else {
>   return true;
> }
> 
> or however you jam that into a statement expression.  That way you
> aren't relying on the compiler to merge the branches.

I tried a few variants, but nothing really made it better.

Find the tiny.c file below; I'm using:

  gcc (Debian 6.3.0-5) 6.3.0 20170124

it has both an inline and an stmt-expr try_cmpxchg variant to play with;
the 'expected' output is at the bottom (same as above left).

Note that clang doesn't compile this stuff due to missing features.

---

/* gcc -Os -std=gnu99 -fno-strict-overflow -falign-jumps=1 -falign-loops=1 -c tiny.c; objdump -dr tiny.o */

typedef _Bool bool;

static inline bool try_cmpxchg(unsigned int *ptr, unsigned int *val, unsigned int new)
{
	unsigned int old = *val;
	bool success;

	asm volatile("lock cmpxchgl %[new], %[ptr]"
		     : "=@ccz" (success),
		       [ptr] "+m" (*ptr),
		       [old] "+a" (old)
		     : [new] "r" (new)
		     : "memory");

//	if (!success)
		*val = old;

	return success;
}

#define __try_cmpxchg(_ptr, _pold, _new)				\
({									\
 	bool success;							\
	__typeof__(_ptr) _old = (_pold);				\
	__typeof__(*(_ptr)) __old = *_old;				\
	__typeof__(*(_ptr)) __new = (_new);				\
	volatile unsigned int * __ptr = (volatile unsigned int *)(_ptr);\
									\
	asm volatile("lock cmpxchgl %[new], %[ptr]"			\
		     : "=@ccz" (success),				\
		       [ptr] "+m" (*__ptr),				\
		       [old] "+a" (__old)				\
		     : [new] "r" (__new)				\
		     : "memory");					\
	/* if (!success) */							\
		*_old = __old;						\
	success;							\
})

#define EXCEPTION_VALUE(val, handler)	asm volatile ("ud2 # %0" : : "r" (val))

#define UINT_MAX	(~0U)

#define likely(x)    __builtin_expect(!!(x), 1)
#define unlikely(x)    __builtin_expect(!!(x), 0)

static inline void refcount_inc(unsigned int *r)
{
	unsigned int new, val = *(unsigned int volatile *)r;

	do {
		if (unlikely(val == UINT_MAX)) /* saturated */
			return;

		if (unlikely(!val)) /* use-after-free */
			goto exception;

		/* cannot overflow because we already checked UINT_MAX */
		new = val + 1;

	} while (unlikely(!try_cmpxchg(r, &val, new)));

	if (unlikely(new == UINT_MAX))
exception:	EXCEPTION_VALUE(val, __refcount_warn);
}

void T_refcount_inc(unsigned int *r)
{
	refcount_inc(r);
}

/*
   0:   8b 07                   mov    (%rdi),%eax
   2:   83 f8 ff                cmp    $0xffffffff,%eax
   5:   74 13                   je     1a <T_refcount_inc+0x1a>
   7:   85 c0                   test   %eax,%eax
   9:   74 0d                   je     18 <T_refcount_inc+0x18>
   b:   8d 50 01                lea    0x1(%rax),%edx
   e:   f0 0f b1 17             lock cmpxchg %edx,(%rdi)
  12:   75 ee                   jne    2 <T_refcount_inc+0x2>
  14:   ff c2                   inc    %edx
  16:   75 02                   jne    1a <T_refcount_inc+0x1a>
  18:   0f 0b                   ud2
  1a:   c3                      retq
*/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ