[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251126151848.2f57d5d4@pumpkin>
Date: Wed, 26 Nov 2025 15:18:48 +0000
From: david laight <david.laight@...box.com>
To: Uros Bizjak <ubizjak@...il.com>
Cc: x86@...nel.org, linux-kernel@...r.kernel.org, Thomas Gleixner
<tglx@...utronix.de>, Ingo Molnar <mingo@...nel.org>, Borislav Petkov
<bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>, "H. Peter Anvin"
<hpa@...or.com>
Subject: Re: [PATCH] x86: Optimize __get_user_asm() assembly
On Wed, 26 Nov 2025 14:23:32 +0100
Uros Bizjak <ubizjak@...il.com> wrote:
> On x86, byte and word moves (MOVB, MOVW) write only to the low
> portion of a 32-bit register, leaving the upper bits unchanged.
> Modern compilers therefore prefer using MOVZBL and MOVZWL to load
> 8-bit and 16-bit values with zero-extension to the full register
> width.
>
> Update __get_user_asm() to follow this convention by explicitly
> zero-extending 8-bit and 16-bit loads from memory.
>
> An additional benefit of this change is that it enables the full
> integer register set to be used for 8-bit loads. Also, it
> eliminates the need for manual zero-extension of 8-bit values.
>
> There is only a minimal increase in code size:
Interesting, where does that come from.
I'd have thought it should shrink it.
The mov[sz]x is one byte longer.
Perhaps a lot of 8-bit values are never zero-extended?
I'm trying to remember what the magic 'k' is for.
Does gas treat %krxx as %dxx ?
Which would matter if 'x' is 64bit when using "=r" for 32bit reads.
But, in that case, I think you need it on the 'movl' as well.
There is the slight problem (which affects for of the asm) is that
gcc (I think) can't assume that the high 32bits of a register result
from an inline asm function are zero.
But if you return 'u64' then everything at the call site has to
be extended as well.
I wonder (not looked at the generated code yet) whether casting
the u64 result from the function to u32 has the effect of avoiding
the zero extend if the value is needed as a 64bit one.
This may (or may not) affect get_user() but will affect the 'bit scan'
functions.
This means you might get better code if 'x' is always u64 (with gas
generating instructions that write to the low 32 bits - zero the high ones)
but with a cast to u32 before any value can be used (esp when inlined).
Of course, I might be smoking substances again...
David
>
> text data bss dec hex filename
> 28258526 4810554 740932 33810012 03e65c vmlinux-old.o
> 28258550 4810554 740932 33810036 03e674 vmlinux-new.o
>
> Signed-off-by: Uros Bizjak <ubizjak@...il.com>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Ingo Molnar <mingo@...nel.org>
> Cc: Borislav Petkov <bp@...en8.de>
> Cc: Dave Hansen <dave.hansen@...ux.intel.com>
> Cc: "H. Peter Anvin" <hpa@...or.com>
> -
> ---
> arch/x86/include/asm/uaccess.h | 40 +++++++++++++++-------------------
> 1 file changed, 17 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index 367297b188c3..cb463c1301f6 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -260,30 +260,27 @@ do { \
> unsigned int __gu_low, __gu_high; \
> const unsigned int __user *__gu_ptr; \
> __gu_ptr = (const void __user *)(ptr); \
> - __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \
> - __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \
> + __get_user_asm(__gu_low, __gu_ptr, "movl", "", label); \
> + __get_user_asm(__gu_high, __gu_ptr+1, "movl", "", label); \
> (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \
> } while (0)
> #else
> #define __get_user_asm_u64(x, ptr, label) \
> - __get_user_asm(x, ptr, "q", "=r", label)
> + __get_user_asm(x, ptr, "movq", "", label)
> #endif
>
> #define __get_user_size(x, ptr, size, label) \
> do { \
> __chk_user_ptr(ptr); \
> switch (size) { \
> - case 1: { \
> - unsigned char x_u8__; \
> - __get_user_asm(x_u8__, ptr, "b", "=q", label); \
> - (x) = x_u8__; \
> + case 1: \
> + __get_user_asm(x, ptr, "movzbl", "k", label); \
> break; \
> - } \
> case 2: \
> - __get_user_asm(x, ptr, "w", "=r", label); \
> + __get_user_asm(x, ptr, "movzwl", "k", label); \
> break; \
> case 4: \
> - __get_user_asm(x, ptr, "l", "=r", label); \
> + __get_user_asm(x, ptr, "movl", "", label); \
> break; \
> case 8: \
> __get_user_asm_u64(x, ptr, label); \
> @@ -294,11 +291,11 @@ do { \
> instrument_get_user(x); \
> } while (0)
>
> -#define __get_user_asm(x, addr, itype, ltype, label) \
> +#define __get_user_asm(x, addr, insn, opmod, label) \
> asm_goto_output("\n" \
> - "1: mov"itype" %[umem],%[output]\n" \
> + "1: " insn " %[umem],%" opmod "[output]\n" \
> _ASM_EXTABLE_UA(1b, %l2) \
> - : [output] ltype(x) \
> + : [output] "=r" (x) \
> : [umem] "m" (__m(addr)) \
> : : label)
>
> @@ -326,26 +323,23 @@ do { \
> })
>
> #else
> -#define __get_user_asm_u64(x, ptr, retval) \
> - __get_user_asm(x, ptr, retval, "q")
> +#define __get_user_asm_u64(x, ptr, retval) \
> + __get_user_asm(x, ptr, "movq", "", retval)
> #endif
>
> #define __get_user_size(x, ptr, size, retval) \
> do { \
> - unsigned char x_u8__; \
> - \
> retval = 0; \
> __chk_user_ptr(ptr); \
> switch (size) { \
> case 1: \
> - __get_user_asm(x_u8__, ptr, retval, "b"); \
> - (x) = x_u8__; \
> + __get_user_asm(x, ptr, "movzbl", "k", retval); \
> break; \
> case 2: \
> - __get_user_asm(x, ptr, retval, "w"); \
> + __get_user_asm(x, ptr, "movzwl", "k", retval); \
> break; \
> case 4: \
> - __get_user_asm(x, ptr, retval, "l"); \
> + __get_user_asm(x, ptr, "movl", "", retval); \
> break; \
> case 8: \
> __get_user_asm_u64(x, ptr, retval); \
> @@ -355,9 +349,9 @@ do { \
> } \
> } while (0)
>
> -#define __get_user_asm(x, addr, err, itype) \
> +#define __get_user_asm(x, addr, insn, opmod, err) \
> asm volatile("\n" \
> - "1: mov"itype" %[umem],%[output]\n" \
> + "1: " insn " %[umem],%" opmod "[output]\n" \
> "2:\n" \
> _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
> EX_FLAG_CLEAR_AX, \
Powered by blists - more mailing lists