lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <417271c4-0297-41da-a39b-5d5b28dd73f9@zytor.com>
Date: Wed, 15 Jan 2025 23:27:11 -0800
From: Xin Li <xin@...or.com>
To: Ethan Zhao <haifeng.zhao@...ux.intel.com>, linux-kernel@...r.kernel.org,
        stable@...r.kernel.org
Cc: tglx@...utronix.de, dave.hansen@...ux.intel.com, x86@...nel.org,
        hpa@...or.com, andrew.cooper3@...rix.com, mingo@...hat.com,
        bp@...en8.de, etzhao@...look.com
Subject: Re: [PATCH] x86/fred: Optimize the FRED entry by prioritizing
 high-probability event dispatching

On 1/15/2025 10:51 PM, Ethan Zhao wrote:
> External interrupts (EVENT_TYPE_EXTINT) and system calls (EVENT_TYPE_OTHER)
> occur more frequently than other events in a typical system. Prioritizing
> these events saves CPU cycles and optimizes the efficiency of performance-
> critical paths.

We deliberately hold off sending performance improvement patches at this
point, but first of all please read:
     https://lore.kernel.org/lkml/87fs766o3t.ffs@tglx/

Thanks!
     Xin

> 
> When examining the compiler-generated assembly code for event dispatching
> in the functions fred_entry_from_user() and fred_entry_from_kernel(), it
> was observed that the compiler intelligently uses a binary search to match
> all event type values (0-7) and perform dispatching. As a result, even if
> the following cases:
> 
> 	case EVENT_TYPE_EXTINT:
> 		return fred_extint(regs);
> 	case EVENT_TYPE_OTHER:
> 		return fred_other(regs);
> 
> are placed at the beginning of the switch() statement, the generated
> assembly code would remain the same, and the expected prioritization would
> not be achieved.
> 
> Command line to check the assembly code generated by the compiler for
> fred_entry_from_user():
> 
> $objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
> 
> 00000000000015a0 <fred_entry_from_user>:
> 15a0:       0f b6 87 a6 00 00 00    movzbl 0xa6(%rdi),%eax
> 15a7:       48 8b 77 78             mov    0x78(%rdi),%rsi
> 15ab:       55                      push   %rbp
> 15ac:       48 c7 47 78 ff ff ff    movq   $0xffffffffffffffff,0x78(%rdi)
> 15b3:       ff
> 15b4:       83 e0 0f                and    $0xf,%eax
> 15b7:       48 89 e5                mov    %rsp,%rbp
> 15ba:       3c 04                   cmp    $0x4,%al
> -->>			            /* match 4(EVENT_TYPE_SWINT) first */
> 15bc:       74 78                   je     1636 <fred_entry_from_user+0x96>
> 15be:       77 15                   ja     15d5 <fred_entry_from_user+0x35>
> 15c0:       3c 02                   cmp    $0x2,%al
> 15c2:       74 53                   je     1617 <fred_entry_from_user+0x77>
> 15c4:       77 65                   ja     162b <fred_entry_from_user+0x8b>
> 15c6:       84 c0                   test   %al,%al
> 15c8:       75 42                   jne    160c <fred_entry_from_user+0x6c>
> 15ca:       e8 71 fc ff ff          callq  1240 <fred_extint>
> 15cf:       5d                      pop    %rbp
> 15d0:       e9 00 00 00 00          jmpq   15d5 <fred_entry_from_user+0x35>
> 15d5:       3c 06                   cmp    $0x6,%al
> 15d7:       74 7c                   je     1655 <fred_entry_from_user+0xb5>
> 15d9:       72 66                   jb     1641 <fred_entry_from_user+0xa1>
> 15db:       3c 07                   cmp    $0x7,%al
> 15dd:       75 2d                   jne    160c <fred_entry_from_user+0x6c>
> 15df:       8b 87 a4 00 00 00       mov    0xa4(%rdi),%eax
> 15e5:       25 ff 00 00 02          and    $0x20000ff,%eax
> 15ea:       3d 01 00 00 02          cmp    $0x2000001,%eax
> 15ef:       75 6f                   jne    1660 <fred_entry_from_user+0xc0>
> 15f1:       48 8b 77 50             mov    0x50(%rdi),%rsi
> 15f5:       48 c7 47 50 da ff ff    movq   $0xffffffffffffffda,0x50(%rdi)
> ... ...
> 
> Command line to check the assembly code generated by the compiler for
> fred_entry_from_kernel():
> 
> $objdump -d vmlinux.o | awk '/<fred_entry_from_kernel>:/{c=65} c&&c--'
> 
> 00000000000016b0 <fred_entry_from_kernel>:
> 16b0:       0f b6 87 a6 00 00 00    movzbl 0xa6(%rdi),%eax
> 16b7:       48 8b 77 78             mov    0x78(%rdi),%rsi
> 16bb:       55                      push   %rbp
> 16bc:       48 c7 47 78 ff ff ff    movq   $0xffffffffffffffff,0x78(%rdi)
> 16c3:       ff
> 16c4:       83 e0 0f                and    $0xf,%eax
> 16c7:       48 89 e5                mov    %rsp,%rbp
> 16ca:       3c 03                   cmp    $0x3,%al
> -->>                                /* match 3(EVENT_TYPE_HWEXC) first */
> 16cc:       74 3c                 je     170a <fred_entry_from_kernel+0x5a>
> 16ce:       76 13                 jbe    16e3 <fred_entry_from_kernel+0x33>
> 16d0:       3c 05                 cmp    $0x5,%al
> 16d2:       74 41                 je     1715 <fred_entry_from_kernel+0x65>
> 16d4:       3c 06                 cmp    $0x6,%al
> 16d6:       75 27                 jne    16ff <fred_entry_from_kernel+0x4f>
> 16d8:       e8 73 fe ff ff        callq  1550 <fred_swexc.isra.3>
> 16dd:       5d                    pop    %rbp
> ... ...
> 
> Therefore, it is necessary to handle EVENT_TYPE_EXTINT and EVENT_TYPE_OTHER
> before the switch statement using if-else syntax to ensure the compiler
> generates the desired code. After applying the patch, the verification
> results are as follows:
> 
> $objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
> 
> 00000000000015a0 <fred_entry_from_user>:
> 15a0:       0f b6 87 a6 00 00 00    movzbl 0xa6(%rdi),%eax
> 15a7:       48 8b 77 78             mov    0x78(%rdi),%rsi
> 15ab:       55                      push   %rbp
> 15ac:       48 c7 47 78 ff ff ff    movq   $0xffffffffffffffff,0x78(%rdi)
> 15b3:       ff
> 15b4:       48 89 e5                mov    %rsp,%rbp
> 15b7:       83 e0 0f                and    $0xf,%eax
> 15ba:       74 34                   je     15f0 <fred_entry_from_user+0x50>
> -->>				    /* match 0(EVENT_TYPE_EXTINT) first */
> 15bc:       3c 07                   cmp    $0x7,%al
> -->>                                /* match 7(EVENT_TYPE_OTHER) second *
> 15be:       74 6e                   je     162e <fred_entry_from_user+0x8e>
> 15c0:       3c 04                   cmp    $0x4,%al
> 15c2:       0f 84 93 00 00 00       je     165b <fred_entry_from_user+0xbb>
> 15c8:       76 13                   jbe    15dd <fred_entry_from_user+0x3d>
> 15ca:       3c 05                   cmp    $0x5,%al
> 15cc:       74 41                   je     160f <fred_entry_from_user+0x6f>
> 15ce:       3c 06                   cmp    $0x6,%al
> 15d0:       75 51                   jne    1623 <fred_entry_from_user+0x83>
> 15d2:       e8 79 ff ff ff          callq  1550 <fred_swexc.isra.3>
> 15d7:       5d                      pop    %rbp
> 15d8:       e9 00 00 00 00          jmpq   15dd <fred_entry_from_user+0x3d>
> 15dd:       3c 02                   cmp    $0x2,%al
> 15df:       74 1a                   je     15fb <fred_entry_from_user+0x5b>
> 15e1:       3c 03                   cmp    $0x3,%al
> 15e3:       75 3e                   jne    1623 <fred_entry_from_user+0x83>
> ... ...
> 
> The same desired code in fred_entry_from_kernel is no longer repeated.
> 
> While the C code with if-else placed before switch() may appear ugly, it
> works. Additionally, using a jump table is not advisable; even if the jump
> table resides in the L1 cache, the cost of loading it is over 10 times the
> latency of a cmp instruction.
> 
> Signed-off-by: Ethan Zhao <haifeng.zhao@...ux.intel.com>
> ---
> base commit: 619f0b6fad524f08d493a98d55bac9ab8895e3a6
> ---
>   arch/x86/entry/entry_fred.c | 25 +++++++++++++++++++------
>   1 file changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
> index f004a4dc74c2..591f47771ecf 100644
> --- a/arch/x86/entry/entry_fred.c
> +++ b/arch/x86/entry/entry_fred.c
> @@ -228,9 +228,18 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
>   	/* Invalidate orig_ax so that syscall_get_nr() works correctly */
>   	regs->orig_ax = -1;
>   
> -	switch (regs->fred_ss.type) {
> -	case EVENT_TYPE_EXTINT:
> +	if (regs->fred_ss.type == EVENT_TYPE_EXTINT)
>   		return fred_extint(regs);
> +	else if (regs->fred_ss.type == EVENT_TYPE_OTHER)
> +		return fred_other(regs);
> +
> +	/*
> +	 * Dispatch EVENT_TYPE_EXTINT and EVENT_TYPE_OTHER(syscall) type events
> +	 * first due to their high probability and let the compiler create binary search
> +	 * dispatching for the remaining events
> +	 */
> +
> +	switch (regs->fred_ss.type) {
>   	case EVENT_TYPE_NMI:
>   		if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
>   			return fred_exc_nmi(regs);
> @@ -245,8 +254,6 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
>   		break;
>   	case EVENT_TYPE_SWEXC:
>   		return fred_swexc(regs, error_code);
> -	case EVENT_TYPE_OTHER:
> -		return fred_other(regs);
>   	default: break;
>   	}
>   
> @@ -260,9 +267,15 @@ __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
>   	/* Invalidate orig_ax so that syscall_get_nr() works correctly */
>   	regs->orig_ax = -1;
>   
> -	switch (regs->fred_ss.type) {
> -	case EVENT_TYPE_EXTINT:
> +	if (regs->fred_ss.type == EVENT_TYPE_EXTINT)
>   		return fred_extint(regs);
> +
> +	/*
> +	 * Dispatch EVENT_TYPE_EXTINT type event first due to its high probability
> +	 * and let the compiler do binary search dispatching for the other events
> +	 */
> +
> +	switch (regs->fred_ss.type) {
>   	case EVENT_TYPE_NMI:
>   		if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
>   			return fred_exc_nmi(regs);


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ