lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <33b89995-b638-4a6b-a75f-8278562237c4@linux.intel.com>
Date: Fri, 17 Jan 2025 13:18:01 +0800
From: Ethan Zhao <haifeng.zhao@...ux.intel.com>
To: Xin Li <xin@...or.com>, "H. Peter Anvin" <hpa@...or.com>,
 Ethan Zhao <etzhao@...look.com>, linux-kernel@...r.kernel.org,
 stable@...r.kernel.org
Cc: tglx@...utronix.de, dave.hansen@...ux.intel.com, x86@...nel.org,
 andrew.cooper3@...rix.com, mingo@...hat.com, bp@...en8.de
Subject: Re: [PATCH] x86/fred: Optimize the FRED entry by prioritizing
 high-probability event dispatching


在 2025/1/17 12:43, Xin Li 写道:
> On 1/16/2025 8:19 PM, Ethan Zhao wrote:
>>
>> 在 2025/1/17 9:21, H. Peter Anvin 写道:
>>> On 1/16/25 16:37, Ethan Zhao wrote:
>>>>>
>>>>> hpa suggested to introduce "switch_likely" for this kind of 
>>>>> optimization
>>>>> on a switch statement, which is also easier to read.  I measured 
>>>>> it with
>>>>> a user space focus test, it does improve performance a lot. But 
>>>>> obviously there are still a lot of work to do.
>>>>
>>>> Find a way to instruct compiler to pick the right hot branch 
>>>> meanwhile make folks
>>>> reading happy... yup, a lot of work.
>>>>
>>>
>>> It's not that complicated, believe it or not.
>>>
>>> /*
>>>  * switch(v) biased for speed in the case v == l
>>>  *
>>>  * Note: gcc is quite sensitive to the exact form of this
>>>  * expression.
>>>  */
>>> #define switch_likely(v,l) \
>>>     switch((__typeof__(v))__builtin_expect((v),(l)))
>>
>> I tried this macro as following, but got something really *weird* 
>> from gcc.
>>
>> +#define switch_likely(v,l) \
>> +        switch((__typeof__(v))__builtin_expect((v),(l)))
>> +
>>   __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
>>   {
>>          unsigned long error_code = regs->orig_ax;
>> +       unsigned short etype = regs->fred_ss.type & 0xf;
>>
>>          /* Invalidate orig_ax so that syscall_get_nr() works 
>> correctly */
>>          regs->orig_ax = -1;
>>
>> -       switch (regs->fred_ss.type) {
>> +       switch_likely ((etype == EVENT_TYPE_EXTINT || etype == 
>> EVENT_TYPE_OTHER), etype) {
>
> Just swap the 2 arguments, and it should be:
> +    switch_likely (etype, EVENT_TYPE_OTHER) {
>
>
after swapped the parameters as following:
+#define switch_likely(v,l) \
+ switch((__typeof__(v))__builtin_expect((v),(l)))
+
  __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
  {
         unsigned long error_code = regs->orig_ax;
+       unsigned short etype = regs->fred_ss.type & 0xf;

         /* Invalidate orig_ax so that syscall_get_nr() works correctly */
         regs->orig_ax = -1;

-       switch (regs->fred_ss.type) {
+       switch_likely (etype, (EVENT_TYPE_EXTINT == etype || 
EVENT_TYPE_OTHER == etype)) {
         case EVENT_TYPE_EXTINT:
                 return fred_extint(regs);
         case EVENT_TYPE_NMI:
@@ -256,11 +260,12 @@ __visible noinstr void fred_entry_from_user(struct 
pt_regs *regs)
  __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
  {
         unsigned long error_code = regs->orig_ax;
+       unsigned short etype = regs->fred_ss.type & 0xf;

         /* Invalidate orig_ax so that syscall_get_nr() works correctly */
         regs->orig_ax = -1;

-       switch (regs->fred_ss.type) {
+       switch_likely (etype, EVENT_TYPE_EXTINT == etype) {
         case EVENT_TYPE_EXTINT:
                 return fred_extint(regs);

         case EVENT_TYPE_NMI:


Good news -- the gcc works normal, generated right asm code,
Bad news -- it fails back to binary search method to do matching, 
ignored our hints.


$objdump -d vmlinux.o | awk '/<fred_entry_from_kernel>:/{c=65} c&&c--'
00000000000016b0 <fred_entry_from_kernel>:
     16b0:       0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
     16b7:       48 8b 77 78 mov    0x78(%rdi),%rsi
     16bb:       55 push   %rbp
     16bc:       48 c7 47 78 ff ff ff movq   $0xffffffffffffffff,0x78(%rdi)
     16c3:       ff
     16c4:       83 e0 0f and    $0xf,%eax
     16c7:       48 89 e5 mov    %rsp,%rbp
     16ca:       3c 03 cmp    $0x3,%al
     16cc:       74 3c je     170a <fred_entry_from_kernel+0x5a>
     16ce:       76 13 jbe    16e3 <fred_entry_from_kernel+0x33>
     16d0:       3c 05 cmp    $0x5,%al
     16d2:       74 41 je     1715 <fred_entry_from_kernel+0x65>
     16d4:       3c 06 cmp    $0x6,%al
     16d6:       75 27 jne    16ff <fred_entry_from_kernel+0x4f>
     16d8:       e8 73 fe ff ff callq  1550 <fred_swexc>
     16dd:       5d pop    %rbp
     16de:       e9 00 00 00 00 jmpq   16e3 <fred_entry_from_kernel+0x33>
     16e3:       84 c0 test   %al,%al
     16e5:       74 42 je     1729 <fred_entry_from_kernel+0x79>
     16e7:       3c 02 cmp    $0x2,%al
     16e9:       75 14 jne    16ff <fred_entry_from_kernel+0x4f>
     16eb:       80 bf a4 00 00 00 02 cmpb   $0x2,0xa4(%rdi)
     16f2:       75 0b jne    16ff <fred_entry_from_kernel+0x4f>
     16f4:       e8 00 00 00 00 callq  16f9 <fred_entry_from_kernel+0x49>
     16f9:       5d pop    %rbp
     16fa:       e9 00 00 00 00 jmpq   16ff <fred_entry_from_kernel+0x4f>
     16ff:       e8 1c fc ff ff callq  1320 <fred_bad_type>
     1704:       5d pop    %rbp
     1705:       e9 00 00 00 00 jmpq   170a <fred_entry_from_kernel+0x5a>
     170a:       e8 21 fd ff ff callq  1430 <fred_hwexc>
     170f:       5d pop    %rbp
     1710:       e9 00 00 00 00 jmpq   1715 <fred_entry_from_kernel+0x65>
     1715:       80 bf a4 00 00 00 01 cmpb   $0x1,0xa4(%rdi)
     171c:       75 e1 jne    16ff <fred_entry_from_kernel+0x4f>
     171e:       e8 00 00 00 00 callq  1723 <fred_entry_from_kernel+0x73>
     1723:       5d pop    %rbp
     1724:       e9 00 00 00 00 jmpq   1729 <fred_entry_from_kernel+0x79>
     1729:       e8 12 fb ff ff callq  1240 <fred_extint>
     172e:       5d pop    %rbp
     172f:       e9 00 00 00 00 jmpq   1734 <fred_entry_from_kernel+0x84>
     1734:       66 66 2e 0f 1f 84 00 data16 nopw %cs:0x0(%rax,%rax,1)
     173b:       00 00 00 00
     173f:       90                      nop

$ objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
00000000000015a0 <fred_entry_from_user>:
     15a0:       0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
     15a7:       48 8b 77 78 mov    0x78(%rdi),%rsi
     15ab:       55 push   %rbp
     15ac:       48 c7 47 78 ff ff ff movq   $0xffffffffffffffff,0x78(%rdi)
     15b3:       ff
     15b4:       83 e0 0f and    $0xf,%eax
     15b7:       48 89 e5 mov    %rsp,%rbp
     15ba:       3c 04 cmp    $0x4,%al
     15bc:       74 78 je     1636 <fred_entry_from_user+0x96>
     15be:       77 15 ja     15d5 <fred_entry_from_user+0x35>
     15c0:       3c 02 cmp    $0x2,%al
     15c2:       74 53 je     1617 <fred_entry_from_user+0x77>
     15c4:       77 65 ja     162b <fred_entry_from_user+0x8b>
     15c6:       84 c0 test   %al,%al
     15c8:       75 42 jne    160c <fred_entry_from_user+0x6c>
     15ca:       e8 71 fc ff ff callq  1240 <fred_extint>
     15cf:       5d pop    %rbp
     15d0:       e9 00 00 00 00 jmpq   15d5 <fred_entry_from_user+0x35>
     15d5:       3c 06 cmp    $0x6,%al
     15d7:       74 7c je     1655 <fred_entry_from_user+0xb5>
     15d9:       72 66 jb     1641 <fred_entry_from_user+0xa1>
     15db:       3c 07 cmp    $0x7,%al
     15dd:       75 2d jne    160c <fred_entry_from_user+0x6c>
     15df:       8b 87 a4 00 00 00 mov    0xa4(%rdi),%eax
     15e5:       25 ff 00 00 02 and    $0x20000ff,%eax
     15ea:       3d 01 00 00 02 cmp    $0x2000001,%eax
     15ef:       75 6f jne    1660 <fred_entry_from_user+0xc0>
     15f1:       48 8b 77 50 mov    0x50(%rdi),%rsi
     15f5:       48 c7 47 50 da ff ff movq   $0xffffffffffffffda,0x50(%rdi)
     15fc:       ff
     15fd:       48 89 77 78 mov    %rsi,0x78(%rdi)
     1601:       e8 00 00 00 00 callq  1606 <fred_entry_from_user+0x66>
     1606:       5d pop    %rbp
     1607:       e9 00 00 00 00 jmpq   160c <fred_entry_from_user+0x6c>
     160c:       e8 0f fd ff ff callq  1320 <fred_bad_type>
     1611:       5d pop    %rbp
     1612:       e9 00 00 00 00 jmpq   1617 <fred_entry_from_user+0x77>
     1617:       80 bf a4 00 00 00 02 cmpb   $0x2,0xa4(%rdi)
     161e:       75 ec jne    160c <fred_entry_from_user+0x6c>
     1620:       e8 00 00 00 00 callq  1625 <fred_entry_from_user+0x85>
     1625:       5d pop    %rbp
     1626:       e9 00 00 00 00 jmpq   162b <fred_entry_from_user+0x8b>
     162b:       e8 00 fe ff ff callq  1430 <fred_hwexc>
     1630:       5d pop    %rbp
     1631:       e9 00 00 00 00 jmpq   1636 <fred_entry_from_user+0x96>
     1636:       e8 85 fc ff ff callq  12c0 <fred_intx>
     163b:       5d pop    %rbp
     163c:       e9 00 00 00 00 jmpq   1641 <fred_entry_from_user+0xa1>
     1641:       80 bf a4 00 00 00 01 cmpb   $0x1,0xa4(%rdi)
     1648:       75 c2 jne    160c <fred_entry_from_user+0x6c>
     164a:       e8 00 00 00 00 callq  164f <fred_entry_from_user+0xaf>
     164f:       5d pop    %rbp
     1650:       e9 00 00 00 00 jmpq   1655 <fred_entry_from_user+0xb5>
     1655:       e8 f6 fe ff ff callq  1550 <fred_swexc>
     165a:       5d pop    %rbp
     165b:       e9 00 00 00 00 jmpq   1660 <fred_entry_from_user+0xc0>
     1660:       83 f8 02 cmp    $0x2,%eax
     1663:       75 24 jne    1689 <fred_entry_from_user+0xe9>
     1665:       80 3d 00 00 00 00 00 cmpb   $0x0,0x0(%rip)        # 
166c <fred_entry_from_user+0xcc>
     166c:       74 1b je     1689 <fred_entry_from_user+0xe9>
     166e:       48 8b 47 50 mov    0x50(%rdi),%rax
     1672:       48 c7 47 50 da ff ff movq   $0xffffffffffffffda,0x50(%rdi)
     1679:       ff
     167a:       48 89 47 78 mov    %rax,0x78(%rdi)

In short, seems that __builtin_expect not work with switch(), at least for
  gcc version 8.5.0 20210514(RHEL).

Thanks,
Ethan



> Probably also check __builtin_expect on 
> https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
>
>>          case EVENT_TYPE_EXTINT:
>>                  return fred_extint(regs);
>>          case EVENT_TYPE_NMI:
>> @@ -256,11 +260,12 @@ __visible noinstr void 
>> fred_entry_from_user(struct pt_regs *regs)
>>   __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
>>   {
>>          unsigned long error_code = regs->orig_ax;
>> +       unsigned short etype = regs->fred_ss.type & 0xf;
>>
>>          /* Invalidate orig_ax so that syscall_get_nr() works 
>> correctly */
>>          regs->orig_ax = -1;
>>
>> -       switch (regs->fred_ss.type) {
>> +       switch_likely (etype == EVENT_TYPE_EXTINT, etype) {
>>          case EVENT_TYPE_EXTINT:
>>                  return fred_extint(regs);
>>          case EVENT_TYPE_NMI:
>>
>> Got the asm code as following:
>>
>>   objdump -d vmlinux.o | awk '/<fred_entry_from_user>:/{c=65} c&&c--'
>> 00000000000015a0 <fred_entry_from_user>:
>>      15a0:       0f b6 87 a6 00 00 00 movzbl 0xa6(%rdi),%eax
>>      15a7:       48 8b 77 78 mov    0x78(%rdi),%rsi
>>      15ab:       55 push   %rbp
>>      15ac:       48 c7 47 78 ff ff ff movq 
>> $0xffffffffffffffff,0x78(%rdi)
>>      15b3:       ff
>>      15b4:       48 89 e5 mov    %rsp,%rbp
>>      15b7:       66 83 e0 0f and    $0xf,%ax
>>      15bb:       74 11 je     15ce <fred_entry_from_user+0x2e>
>>      15bd:       66 83 f8 07 cmp    $0x7,%ax
>>      15c1:       74 0b je     15ce <fred_entry_from_user+0x2e>
>>      15c3:       e8 78 fc ff ff callq  1240 <fred_extint>
>>      15c8:       5d pop    %rbp
>>      15c9:       e9 00 00 00 00 jmpq   15ce <fred_entry_from_user+0x2e>
>>      15ce:       e8 4d fd ff ff callq  1320 <fred_bad_type>
>>      15d3:       5d pop    %rbp
>>      15d4:       e9 00 00 00 00 jmpq   15d9 <fred_entry_from_user+0x39>
>>      15d9:       0f 1f 80 00 00 00 00 nopl   0x0(%rax)
>>
>> 00000000000015e0 <__pfx_fred_entry_from_kernel>:
>>      15e0:       90                      nop
>>      15e1:       90                      nop
>>
>> 00000000000015f0 <fred_entry_from_kernel>:
>>      15f0:       55 push   %rbp
>>      15f1:       48 8b 77 78 mov    0x78(%rdi),%rsi
>>      15f5:       48 c7 47 78 ff ff ff movq 
>> $0xffffffffffffffff,0x78(%rdi)
>>      15fc:       ff
>>      15fd:       48 89 e5 mov    %rsp,%rbp
>>      1600:       f6 87 a6 00 00 00 0f testb  $0xf,0xa6(%rdi)
>>      1607:       75 0b jne    1614 <fred_entry_from_kernel+0x24>
>>      1609:       e8 12 fd ff ff callq  1320 <fred_bad_type>
>>      160e:       5d pop    %rbp
>>      160f:       e9 00 00 00 00 jmpq   1614 
>> <fred_entry_from_kernel+0x24>
>>      1614:       e8 27 fc ff ff callq  1240 <fred_extint>
>>      1619:       5d pop    %rbp
>>      161a:       e9 00 00 00 00 jmpq   161f 
>> <fred_entry_from_kernel+0x2f>
>>      161f:       90                      nop
>>
>> 0000000000001620 <__pfx___fred_entry_from_kvm>:
>>      1620:       90                      nop
>>      1621:       90                      nop
>>
>>
>> Even the fred_entry_from_kernel() asm code doesn't look right.
>> *gcc version 8.5.0 20210514 (Red Hat 8.5.0-10) (GCC)*
>> **
>> *Did I screw up something ?*
>> **
>> *Thanks,*
>> *Ethan*
>>>
>>>     -hpa
>>>
>>
>
>
-- 
"firm, enduring, strong, and long-lived"


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ