lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 2 Apr 2019 13:33:06 +0200
From:   Alexander Potapenko <glider@...gle.com>
To:     Paul McKenney <paulmck@...ux.ibm.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Peter Zijlstra <peterz@...radead.org>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Dmitriy Vyukov <dvyukov@...gle.com>,
        James Y Knight <jyknight@...gle.com>, x86@...nel.org,
        Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH v2] x86/asm: fix assembly constraints in bitops

On Tue, Apr 2, 2019 at 1:28 PM Alexander Potapenko <glider@...gle.com> wrote:
>
> 1. Use memory clobber in bitops that touch arbitrary memory
>
> Certain bit operations that read/write bits take a base pointer and an
> arbitrarily large offset to address the bit relative to that base.
> Inline assembly constraints aren't expressive enough to tell the
> compiler that the assembly directive is going to touch a specific memory
> location of unknown size, therefore we have to use the "memory" clobber
> to indicate that the assembly is going to access memory locations other
> than those listed in the inputs/outputs.
> To indicate that BTR/BTS instructions don't necessarily touch the first
> sizeof(long) bytes of the argument, we also move the address to assembly
> inputs.
>
> This particular change leads to size increase of 124 kernel functions in
> a defconfig build. For some of them the diff is in NOP operations, other
> end up re-reading values from memory and may potentially slow down the
> execution. But without these clobbers the compiler is free to cache
> the contents of the bitmaps and use them as if they weren't changed by
> the inline assembly.
>
> 2. Use byte-sized arguments for operations touching single bytes.
>
> Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> treat sizeof(long) bytes as being clobbered, which isn't the case. This
> may theoretically lead to worse code in the case of heavy optimization.
>
> Signed-off-by: Alexander Potapenko <glider@...gle.com>
> Cc: Dmitry Vyukov <dvyukov@...gle.com>
> Cc: Paul E. McKenney <paulmck@...ux.ibm.com>
> Cc: H. Peter Anvin <hpa@...or.com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: James Y Knight <jyknight@...gle.com>
> ---
>  v2:
>   -- renamed the patch
>   -- addressed comment by Peter Zijlstra: don't use "+m" for functions
>   returning void
>   -- fixed input types for operations touching single bytes
> ---
>  arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
>  1 file changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index d153d570bb04..c0cb9c5d3476 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -36,16 +36,17 @@
>   * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
>   */
>
> -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
> +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
>
> -#define ADDR                           BITOP_ADDR(addr)
> +#define ADDR                           RLONG_ADDR(addr)
>
>  /*
>   * We do the locked ops that don't return the old value as
>   * a mask operation on a byte.
>   */
>  #define IS_IMMEDIATE(nr)               (__builtin_constant_p(nr))
> -#define CONST_MASK_ADDR(nr, addr)      BITOP_ADDR((void *)(addr) + ((nr)>>3))
> +#define CONST_MASK_ADDR(nr, addr)      WBYTE_ADDR((void *)(addr) + ((nr)>>3))
>  #define CONST_MASK(nr)                 (1 << ((nr) & 7))
>
>  /**
> @@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
>                         : "memory");
>         } else {
>                 asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
> -                       : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
> +                       : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
>         }
>  }
>
> @@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
>   */
>  static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
>  {
> -       asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
> +       asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
>  }
>
>  /**
> @@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
>                         : "iq" ((u8)~CONST_MASK(nr)));
>         } else {
>                 asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
> -                       : BITOP_ADDR(addr)
> -                       : "Ir" (nr));
> +                       : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
>         }
>  }
>
> @@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
>
>  static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
>  {
> -       asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
> +       asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
>  }
>
>  static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
> @@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
>         bool negative;
>         asm volatile(LOCK_PREFIX "andb %2,%1"
>                 CC_SET(s)
> -               : CC_OUT(s) (negative), ADDR
> +               : CC_OUT(s) (negative), WBYTE_ADDR(addr)
>                 : "ir" ((char) ~(1 << nr)) : "memory");
>         return negative;
>  }
> @@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
>   * __clear_bit() is non-atomic and implies release semantics before the memory
>   * operation. It can be used for an unlock if no other CPUs can concurrently
>   * modify other bits in the word.
> - *
> - * No memory barrier is required here, because x86 cannot reorder stores past
> - * older loads. Same principle as spin_unlock.
>   */
>  static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
>  {
> -       barrier();
Should have reflected this in the patch description.
>         __clear_bit(nr, addr);
>  }
>
> @@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
>   */
>  static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
>  {
> -       asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
> +       asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
>  }
>
>  /**
> @@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
>                         : "iq" ((u8)CONST_MASK(nr)));
>         } else {
>                 asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
> -                       : BITOP_ADDR(addr)
> -                       : "Ir" (nr));
> +                       : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
>         }
>  }
>
> @@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
>
>         asm(__ASM_SIZE(bts) " %2,%1"
>             CC_SET(c)
> -           : CC_OUT(c) (oldbit), ADDR
> -           : "Ir" (nr));
> +           : CC_OUT(c) (oldbit)
> +           : ADDR, "Ir" (nr) : "memory");
>         return oldbit;
>  }
>
> @@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
>
>         asm volatile(__ASM_SIZE(btr) " %2,%1"
>                      CC_SET(c)
> -                    : CC_OUT(c) (oldbit), ADDR
> -                    : "Ir" (nr));
> +                    : CC_OUT(c) (oldbit)
> +                    : ADDR, "Ir" (nr) : "memory");
>         return oldbit;
>  }
>
> @@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
>
>         asm volatile(__ASM_SIZE(btc) " %2,%1"
>                      CC_SET(c)
> -                    : CC_OUT(c) (oldbit), ADDR
> -                    : "Ir" (nr) : "memory");
> +                    : CC_OUT(c) (oldbit)
> +                    : ADDR, "Ir" (nr) : "memory");
>
>         return oldbit;
>  }
> @@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
>         asm volatile(__ASM_SIZE(bt) " %2,%1"
>                      CC_SET(c)
>                      : CC_OUT(c) (oldbit)
> -                    : "m" (*(unsigned long *)addr), "Ir" (nr));
> +                    : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
>
>         return oldbit;
>  }
> --
> 2.21.0.392.gf8f6787159e-goog
>


-- 
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ