[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <417271c4-0297-41da-a39b-5d5b28dd73f9@zytor.com>
Date: Wed, 15 Jan 2025 23:27:11 -0800
From: Xin Li <xin@...or.com>
To: Ethan Zhao <haifeng.zhao@...ux.intel.com>, linux-kernel@...r.kernel.org,
stable@...r.kernel.org
Cc: tglx@...utronix.de, dave.hansen@...ux.intel.com, x86@...nel.org,
hpa@...or.com, andrew.cooper3@...rix.com, mingo@...hat.com,
bp@...en8.de, etzhao@...look.com
Subject: Re: [PATCH] x86/fred: Optimize the FRED entry by prioritizing
high-probability event dispatching
On 1/15/2025 10:51 PM, Ethan Zhao wrote:
> External interrupts (EVENT_TYPE_EXTINT) and system calls (EVENT_TYPE_OTHER)
> occur more frequently than other events in a typical system. Prioritizing
> these events saves CPU cycles and optimizes the efficiency of performance-
> critical paths.
We deliberately hold off sending performance improvement patches at this
point, but first of all please read:
https://lore.kernel.org/lkml/87fs766o3t.ffs@tglx/
Thanks!
Xin
>
> When examining the compiler-generated assembly code for event dispatching
> in the functions fred_entry_from_user() and fred_entry_from_kernel(), it
> was observed that the compiler intelligently uses a binary search to match
> all event type values (0-7) and perform dispatching. As a result, even if
> the following cases:
>
> case EVENT_TYPE_EXTINT:
> return fred_extint(regs);
> case EVENT_TYPE_OTHER:
> return fred_other(regs);
>
> are placed at the beginning of the switch() statement, the generated
> assembly code would remain the same, and the expected prioritization would
> not be achieved.
>
> Command line to check the assembly code generated by the compiler for
> fred_entry_from_user():
>
> $objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
>
> 00000000000015a0 <fred_entry_from_user>:
> 15a0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
> 15a7: 48 8b 77 78 mov 0x78(%rdi),%rsi
> 15ab: 55 push %rbp
> 15ac: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi)
> 15b3: ff
> 15b4: 83 e0 0f and $0xf,%eax
> 15b7: 48 89 e5 mov %rsp,%rbp
> 15ba: 3c 04 cmp $0x4,%al
> -->> /* match 4(EVENT_TYPE_SWINT) first */
> 15bc: 74 78 je 1636 <fred_entry_from_user+0x96>
> 15be: 77 15 ja 15d5 <fred_entry_from_user+0x35>
> 15c0: 3c 02 cmp $0x2,%al
> 15c2: 74 53 je 1617 <fred_entry_from_user+0x77>
> 15c4: 77 65 ja 162b <fred_entry_from_user+0x8b>
> 15c6: 84 c0 test %al,%al
> 15c8: 75 42 jne 160c <fred_entry_from_user+0x6c>
> 15ca: e8 71 fc ff ff callq 1240 <fred_extint>
> 15cf: 5d pop %rbp
> 15d0: e9 00 00 00 00 jmpq 15d5 <fred_entry_from_user+0x35>
> 15d5: 3c 06 cmp $0x6,%al
> 15d7: 74 7c je 1655 <fred_entry_from_user+0xb5>
> 15d9: 72 66 jb 1641 <fred_entry_from_user+0xa1>
> 15db: 3c 07 cmp $0x7,%al
> 15dd: 75 2d jne 160c <fred_entry_from_user+0x6c>
> 15df: 8b 87 a4 00 00 00 mov 0xa4(%rdi),%eax
> 15e5: 25 ff 00 00 02 and $0x20000ff,%eax
> 15ea: 3d 01 00 00 02 cmp $0x2000001,%eax
> 15ef: 75 6f jne 1660 <fred_entry_from_user+0xc0>
> 15f1: 48 8b 77 50 mov 0x50(%rdi),%rsi
> 15f5: 48 c7 47 50 da ff ff movq $0xffffffffffffffda,0x50(%rdi)
> ... ...
>
> Command line to check the assembly code generated by the compiler for
> fred_entry_from_kernel():
>
> $objdump -d vmlinux.o | awk '/<fred_entry_from_kernel>:/{c=65} c&&c--'
>
> 00000000000016b0 <fred_entry_from_kernel>:
> 16b0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
> 16b7: 48 8b 77 78 mov 0x78(%rdi),%rsi
> 16bb: 55 push %rbp
> 16bc: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi)
> 16c3: ff
> 16c4: 83 e0 0f and $0xf,%eax
> 16c7: 48 89 e5 mov %rsp,%rbp
> 16ca: 3c 03 cmp $0x3,%al
> -->> /* match 3(EVENT_TYPE_HWEXC) first */
> 16cc: 74 3c je 170a <fred_entry_from_kernel+0x5a>
> 16ce: 76 13 jbe 16e3 <fred_entry_from_kernel+0x33>
> 16d0: 3c 05 cmp $0x5,%al
> 16d2: 74 41 je 1715 <fred_entry_from_kernel+0x65>
> 16d4: 3c 06 cmp $0x6,%al
> 16d6: 75 27 jne 16ff <fred_entry_from_kernel+0x4f>
> 16d8: e8 73 fe ff ff callq 1550 <fred_swexc.isra.3>
> 16dd: 5d pop %rbp
> ... ...
>
> Therefore, it is necessary to handle EVENT_TYPE_EXTINT and EVENT_TYPE_OTHER
> before the switch statement using if-else syntax to ensure the compiler
> generates the desired code. After applying the patch, the verification
> results are as follows:
>
> $objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
>
> 00000000000015a0 <fred_entry_from_user>:
> 15a0: 0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
> 15a7: 48 8b 77 78 mov 0x78(%rdi),%rsi
> 15ab: 55 push %rbp
> 15ac: 48 c7 47 78 ff ff ff movq $0xffffffffffffffff,0x78(%rdi)
> 15b3: ff
> 15b4: 48 89 e5 mov %rsp,%rbp
> 15b7: 83 e0 0f and $0xf,%eax
> 15ba: 74 34 je 15f0 <fred_entry_from_user+0x50>
> -->> /* match 0(EVENT_TYPE_EXTINT) first */
> 15bc: 3c 07 cmp $0x7,%al
> -->> /* match 7(EVENT_TYPE_OTHER) second *
> 15be: 74 6e je 162e <fred_entry_from_user+0x8e>
> 15c0: 3c 04 cmp $0x4,%al
> 15c2: 0f 84 93 00 00 00 je 165b <fred_entry_from_user+0xbb>
> 15c8: 76 13 jbe 15dd <fred_entry_from_user+0x3d>
> 15ca: 3c 05 cmp $0x5,%al
> 15cc: 74 41 je 160f <fred_entry_from_user+0x6f>
> 15ce: 3c 06 cmp $0x6,%al
> 15d0: 75 51 jne 1623 <fred_entry_from_user+0x83>
> 15d2: e8 79 ff ff ff callq 1550 <fred_swexc.isra.3>
> 15d7: 5d pop %rbp
> 15d8: e9 00 00 00 00 jmpq 15dd <fred_entry_from_user+0x3d>
> 15dd: 3c 02 cmp $0x2,%al
> 15df: 74 1a je 15fb <fred_entry_from_user+0x5b>
> 15e1: 3c 03 cmp $0x3,%al
> 15e3: 75 3e jne 1623 <fred_entry_from_user+0x83>
> ... ...
>
> The same desired code in fred_entry_from_kernel is no longer repeated.
>
> While the C code with if-else placed before switch() may appear ugly, it
> works. Additionally, using a jump table is not advisable; even if the jump
> table resides in the L1 cache, the cost of loading it is over 10 times the
> latency of a cmp instruction.
>
> Signed-off-by: Ethan Zhao <haifeng.zhao@...ux.intel.com>
> ---
> base commit: 619f0b6fad524f08d493a98d55bac9ab8895e3a6
> ---
> arch/x86/entry/entry_fred.c | 25 +++++++++++++++++++------
> 1 file changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
> index f004a4dc74c2..591f47771ecf 100644
> --- a/arch/x86/entry/entry_fred.c
> +++ b/arch/x86/entry/entry_fred.c
> @@ -228,9 +228,18 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
> /* Invalidate orig_ax so that syscall_get_nr() works correctly */
> regs->orig_ax = -1;
>
> - switch (regs->fred_ss.type) {
> - case EVENT_TYPE_EXTINT:
> + if (regs->fred_ss.type == EVENT_TYPE_EXTINT)
> return fred_extint(regs);
> + else if (regs->fred_ss.type == EVENT_TYPE_OTHER)
> + return fred_other(regs);
> +
> + /*
> + * Dispatch EVENT_TYPE_EXTINT and EVENT_TYPE_OTHER(syscall) type events
> + * first due to their high probability and let the compiler create binary search
> + * dispatching for the remaining events
> + */
> +
> + switch (regs->fred_ss.type) {
> case EVENT_TYPE_NMI:
> if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
> return fred_exc_nmi(regs);
> @@ -245,8 +254,6 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
> break;
> case EVENT_TYPE_SWEXC:
> return fred_swexc(regs, error_code);
> - case EVENT_TYPE_OTHER:
> - return fred_other(regs);
> default: break;
> }
>
> @@ -260,9 +267,15 @@ __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
> /* Invalidate orig_ax so that syscall_get_nr() works correctly */
> regs->orig_ax = -1;
>
> - switch (regs->fred_ss.type) {
> - case EVENT_TYPE_EXTINT:
> + if (regs->fred_ss.type == EVENT_TYPE_EXTINT)
> return fred_extint(regs);
> +
> + /*
> + * Dispatch EVENT_TYPE_EXTINT type event first due to its high probability
> + * and let the compiler do binary search dispatching for the other events
> + */
> +
> + switch (regs->fred_ss.type) {
> case EVENT_TYPE_NMI:
> if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
> return fred_exc_nmi(regs);
Powered by blists - more mailing lists