lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 24 Mar 2017 18:51:15 +0100
From:   Dmitry Vyukov <dvyukov@...gle.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Andy Lutomirski <luto@...capital.net>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Andy Lutomirski <luto@...nel.org>,
        Borislav Petkov <bp@...en8.de>,
        Brian Gerst <brgerst@...il.com>,
        Denys Vlasenko <dvlasenk@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Paul McKenney <paulmck@...ux.vnet.ibm.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...nel.org>,
        LKML <linux-kernel@...r.kernel.org>
Subject: Re: locking/atomic: Introduce atomic_try_cmpxchg()

On Fri, Mar 24, 2017 at 6:23 PM, Peter Zijlstra <peterz@...radead.org> wrote:
> On Fri, Mar 24, 2017 at 09:54:46AM -0700, Andy Lutomirski wrote:
>> > So the first snipped I tested regressed like so:
>> >
>> >
>> > 0000000000000000 <T_refcount_inc>:                              0000000000000000 <T_refcount_inc>:
>> >    0:   8b 07                   mov    (%rdi),%eax                 0:   8b 17                   mov    (%rdi),%edx
>> >    2:   83 f8 ff                cmp    $0xffffffff,%eax            2:   83 fa ff                cmp    $0xffffffff,%edx
>> >    5:   74 13                   je     1a <T_refcount_inc+0x1a>    5:   74 1a                   je     21 <T_refcount_inc+0x21>
>> >    7:   85 c0                   test   %eax,%eax                   7:   85 d2                   test   %edx,%edx
>> >    9:   74 0d                   je     18 <T_refcount_inc+0x18>    9:   74 13                   je     1e <T_refcount_inc+0x1e>
>> >    b:   8d 50 01                lea    0x1(%rax),%edx              b:   8d 4a 01                lea    0x1(%rdx),%ecx
>> >    e:   f0 0f b1 17             lock cmpxchg %edx,(%rdi)           e:   89 d0                   mov    %edx,%eax
>> >   12:   75 ee                   jne    2 <T_refcount_inc+0x2>     10:   f0 0f b1 0f             lock cmpxchg %ecx,(%rdi)
>> >   14:   ff c2                   inc    %edx                       14:   74 04                   je     1a <T_refcount_inc+0x1a>
>> >   16:   75 02                   jne    1a <T_refcount_inc+0x1a>   16:   89 c2                   mov    %eax,%edx
>> >   18:   0f 0b                   ud2                               18:   eb e8                   jmp    2 <T_refcount_inc+0x2>
>> >   1a:   c3                      retq                              1a:   ff c1                   inc    %ecx
>> >                                                                   1c:   75 03                   jne    21 <T_refcount_inc+0x21>
>> >                                                                   1e:   0f 0b                   ud2
>> >                                                                   20:   c3                      retq
>> >                                                                   21:   c3                      retq
>>
>> Can you re-send the better asm you got earlier?
>
> On the left?
>
>> If I pretend to be a dumb compiler, I wonder if you'd get better results with:
>>
>> if (!success) {
>>   *_old = __old;
>>   return false;
>> } else {
>>   return true;
>> }
>>
>> or however you jam that into a statement expression.  That way you
>> aren't relying on the compiler to merge the branches.
>
> I tried a few variants, but nothing really made it better.
>
> Find the tiny.c file below; I'm using:
>
>   gcc (Debian 6.3.0-5) 6.3.0 20170124
>
> it has both an inline and an stmt-expr try_cmpxchg variant to play with;
> the 'expected' output is at the bottom (same as above left).
>
> Note that clang doesn't compile this stuff due to missing features.
>
> ---
>
> /* gcc -Os -std=gnu99 -fno-strict-overflow -falign-jumps=1 -falign-loops=1 -c tiny.c; objdump -dr tiny.o */
>
> typedef _Bool bool;
>
> static inline bool try_cmpxchg(unsigned int *ptr, unsigned int *val, unsigned int new)
> {
>         unsigned int old = *val;
>         bool success;
>
>         asm volatile("lock cmpxchgl %[new], %[ptr]"
>                      : "=@ccz" (success),
>                        [ptr] "+m" (*ptr),
>                        [old] "+a" (old)
>                      : [new] "r" (new)
>                      : "memory");
>
> //      if (!success)
>                 *val = old;
>
>         return success;
> }
>
> #define __try_cmpxchg(_ptr, _pold, _new)                                \
> ({                                                                      \
>         bool success;                                                   \
>         __typeof__(_ptr) _old = (_pold);                                \
>         __typeof__(*(_ptr)) __old = *_old;                              \
>         __typeof__(*(_ptr)) __new = (_new);                             \
>         volatile unsigned int * __ptr = (volatile unsigned int *)(_ptr);\
>                                                                         \
>         asm volatile("lock cmpxchgl %[new], %[ptr]"                     \
>                      : "=@ccz" (success),                               \
>                        [ptr] "+m" (*__ptr),                             \
>                        [old] "+a" (__old)                               \
>                      : [new] "r" (__new)                                \
>                      : "memory");                                       \
>         /* if (!success) */                                                     \
>                 *_old = __old;                                          \
>         success;                                                        \
> })
>
> #define EXCEPTION_VALUE(val, handler)   asm volatile ("ud2 # %0" : : "r" (val))
>
> #define UINT_MAX        (~0U)
>
> #define likely(x)    __builtin_expect(!!(x), 1)
> #define unlikely(x)    __builtin_expect(!!(x), 0)
>
> static inline void refcount_inc(unsigned int *r)
> {
>         unsigned int new, val = *(unsigned int volatile *)r;
>
>         do {
>                 if (unlikely(val == UINT_MAX)) /* saturated */
>                         return;
>
>                 if (unlikely(!val)) /* use-after-free */
>                         goto exception;
>
>                 /* cannot overflow because we already checked UINT_MAX */
>                 new = val + 1;
>
>         } while (unlikely(!try_cmpxchg(r, &val, new)));
>
>         if (unlikely(new == UINT_MAX))
> exception:      EXCEPTION_VALUE(val, __refcount_warn);
> }
>
> void T_refcount_inc(unsigned int *r)
> {
>         refcount_inc(r);
> }
>
> /*
>    0:   8b 07                   mov    (%rdi),%eax
>    2:   83 f8 ff                cmp    $0xffffffff,%eax
>    5:   74 13                   je     1a <T_refcount_inc+0x1a>
>    7:   85 c0                   test   %eax,%eax
>    9:   74 0d                   je     18 <T_refcount_inc+0x18>
>    b:   8d 50 01                lea    0x1(%rax),%edx
>    e:   f0 0f b1 17             lock cmpxchg %edx,(%rdi)
>   12:   75 ee                   jne    2 <T_refcount_inc+0x2>
>   14:   ff c2                   inc    %edx
>   16:   75 02                   jne    1a <T_refcount_inc+0x1a>
>   18:   0f 0b                   ud2
>   1a:   c3                      retq
> */



This seems to help ;)

#define try_cmpxchg(ptr, pold, new) __atomic_compare_exchange_n(ptr,
pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ