lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251126151848.2f57d5d4@pumpkin>
Date: Wed, 26 Nov 2025 15:18:48 +0000
From: david laight <david.laight@...box.com>
To: Uros Bizjak <ubizjak@...il.com>
Cc: x86@...nel.org, linux-kernel@...r.kernel.org, Thomas Gleixner
 <tglx@...utronix.de>, Ingo Molnar <mingo@...nel.org>, Borislav Petkov
 <bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>, "H. Peter Anvin"
 <hpa@...or.com>
Subject: Re: [PATCH] x86: Optimize __get_user_asm() assembly

On Wed, 26 Nov 2025 14:23:32 +0100
Uros Bizjak <ubizjak@...il.com> wrote:

> On x86, byte and word moves (MOVB, MOVW) write only to the low
> portion of a 32-bit register, leaving the upper bits unchanged.
> Modern compilers therefore prefer using MOVZBL and MOVZWL to load
> 8-bit and 16-bit values with zero-extension to the full register
> width.
> 
> Update __get_user_asm() to follow this convention by explicitly
> zero-extending 8-bit and 16-bit loads from memory.
> 
> An additional benefit of this change is that it enables the full
> integer register set to be used for 8-bit loads. Also, it
> eliminates the need for manual zero-extension of 8-bit values.
> 
> There is only a minimal increase in code size:

Interesting, where does that come from.
I'd have thought it should shrink it.
The mov[sz]x is one byte longer.
Perhaps a lot of 8-bit values are never zero-extended?

I'm trying to remember what the magic 'k' is for.
Does gas treat %krxx as %dxx ?
Which would matter if 'x' is 64bit when using "=r" for 32bit reads.
But, in that case, I think you need it on the 'movl' as well.

There is the slight problem (which affects for of the asm) is that
gcc (I think) can't assume that the high 32bits of a register result
from an inline asm function are zero.
But if you return 'u64' then everything at the call site has to
be extended as well.
I wonder (not looked at the generated code yet) whether casting
the u64 result from the function to u32 has the effect of avoiding
the zero extend if the value is needed as a 64bit one.
This may (or may not) affect get_user() but will affect the 'bit scan'
functions.

This means you might get better code if 'x' is always u64 (with gas
generating instructions that write to the low 32 bits - zero the high ones)
but with a cast to u32 before any value can be used (esp when inlined).

Of course, I might be smoking substances again...

	David

> 
>       text     data     bss      dec     hex filename
>   28258526  4810554  740932 33810012  03e65c vmlinux-old.o
>   28258550  4810554  740932 33810036  03e674 vmlinux-new.o
> 
> Signed-off-by: Uros Bizjak <ubizjak@...il.com>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Ingo Molnar <mingo@...nel.org>
> Cc: Borislav Petkov <bp@...en8.de>
> Cc: Dave Hansen <dave.hansen@...ux.intel.com>
> Cc: "H. Peter Anvin" <hpa@...or.com>
> -
> ---
>  arch/x86/include/asm/uaccess.h | 40 +++++++++++++++-------------------
>  1 file changed, 17 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index 367297b188c3..cb463c1301f6 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -260,30 +260,27 @@ do {									\
>  	unsigned int __gu_low, __gu_high;				\
>  	const unsigned int __user *__gu_ptr;				\
>  	__gu_ptr = (const void __user *)(ptr);				\
> -	__get_user_asm(__gu_low, __gu_ptr, "l", "=r", label);		\
> -	__get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label);	\
> +	__get_user_asm(__gu_low, __gu_ptr, "movl", "", label);		\
> +	__get_user_asm(__gu_high, __gu_ptr+1, "movl", "", label);	\
>  	(x) = ((unsigned long long)__gu_high << 32) | __gu_low;		\
>  } while (0)
>  #else
>  #define __get_user_asm_u64(x, ptr, label)				\
> -	__get_user_asm(x, ptr, "q", "=r", label)
> +	__get_user_asm(x, ptr, "movq", "", label)
>  #endif
>  
>  #define __get_user_size(x, ptr, size, label)				\
>  do {									\
>  	__chk_user_ptr(ptr);						\
>  	switch (size) {							\
> -	case 1:	{							\
> -		unsigned char x_u8__;					\
> -		__get_user_asm(x_u8__, ptr, "b", "=q", label);		\
> -		(x) = x_u8__;						\
> +	case 1:								\
> +		__get_user_asm(x, ptr, "movzbl", "k", label);		\
>  		break;							\
> -	}								\
>  	case 2:								\
> -		__get_user_asm(x, ptr, "w", "=r", label);		\
> +		__get_user_asm(x, ptr, "movzwl", "k", label);		\
>  		break;							\
>  	case 4:								\
> -		__get_user_asm(x, ptr, "l", "=r", label);		\
> +		__get_user_asm(x, ptr, "movl", "", label);		\
>  		break;							\
>  	case 8:								\
>  		__get_user_asm_u64(x, ptr, label);			\
> @@ -294,11 +291,11 @@ do {									\
>  	instrument_get_user(x);						\
>  } while (0)
>  
> -#define __get_user_asm(x, addr, itype, ltype, label)			\
> +#define __get_user_asm(x, addr, insn, opmod, label)			\
>  	asm_goto_output("\n"						\
> -		     "1:	mov"itype" %[umem],%[output]\n"		\
> +		     "1:	" insn " %[umem],%" opmod "[output]\n"	\
>  		     _ASM_EXTABLE_UA(1b, %l2)				\
> -		     : [output] ltype(x)				\
> +		     : [output] "=r" (x)				\
>  		     : [umem] "m" (__m(addr))				\
>  		     : : label)
>  
> @@ -326,26 +323,23 @@ do {									\
>  })
>  
>  #else
> -#define __get_user_asm_u64(x, ptr, retval) \
> -	 __get_user_asm(x, ptr, retval, "q")
> +#define __get_user_asm_u64(x, ptr, retval)				\
> +	__get_user_asm(x, ptr, "movq", "", retval)
>  #endif
>  
>  #define __get_user_size(x, ptr, size, retval)				\
>  do {									\
> -	unsigned char x_u8__;						\
> -									\
>  	retval = 0;							\
>  	__chk_user_ptr(ptr);						\
>  	switch (size) {							\
>  	case 1:								\
> -		__get_user_asm(x_u8__, ptr, retval, "b");		\
> -		(x) = x_u8__;						\
> +		 __get_user_asm(x, ptr, "movzbl", "k", retval);		\
>  		break;							\
>  	case 2:								\
> -		__get_user_asm(x, ptr, retval, "w");			\
> +		__get_user_asm(x, ptr, "movzwl", "k", retval);		\
>  		break;							\
>  	case 4:								\
> -		__get_user_asm(x, ptr, retval, "l");			\
> +		__get_user_asm(x, ptr, "movl", "", retval);		\
>  		break;							\
>  	case 8:								\
>  		__get_user_asm_u64(x, ptr, retval);			\
> @@ -355,9 +349,9 @@ do {									\
>  	}								\
>  } while (0)
>  
> -#define __get_user_asm(x, addr, err, itype)				\
> +#define __get_user_asm(x, addr, insn, opmod, err)			\
>  	asm volatile("\n"						\
> -		     "1:	mov"itype" %[umem],%[output]\n"		\
> +		     "1:	" insn " %[umem],%" opmod "[output]\n"	\
>  		     "2:\n"						\
>  		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
>  					   EX_FLAG_CLEAR_AX,		\


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ