linux-kernel - Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <CANn89i+mWh3=36R8Y8Fra0wQY4p82EPDNgZ=O5P7+d8meGxsiA@mail.gmail.com>
Date:   Fri, 16 Apr 2021 22:11:26 +0200
From:   Eric Dumazet <edumazet@...gle.com>
To:     Al Viro <viro@...iv.linux.org.uk>
Cc:     Eric Dumazet <eric.dumazet@...il.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        linux-kernel <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

On Fri, Apr 16, 2021 at 9:44 PM Al Viro <viro@...iv.linux.org.uk> wrote:
>
> On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@...gle.com>
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
>
> Does it actually yield a better code?
>

Yes, my patch gives a better code, on actual kernel use-case

(net-next tree, look at put_cmsg())

5ca: 48 89 0f              mov    %rcx,(%rdi)
 5cd: 89 77 08              mov    %esi,0x8(%rdi)
 5d0: 89 57 0c              mov    %edx,0xc(%rdi)
 5d3: 48 83 c7 10          add    $0x10,%rdi
 5d7: 48 83 c1 f0          add    $0xfffffffffffffff0,%rcx
 5db: 48 83 f9 07          cmp    $0x7,%rcx
 5df: 76 40                jbe    621 <put_cmsg+0x111>
 5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
%cs:0x0(%rax,%rax,1)
 5e8: 0f 1f 84 00 00 00 00
 5ef: 00
 5f0: 49 8b 10              mov    (%r8),%rdx
 5f3: 48 89 17              mov    %rdx,(%rdi)
 5f6: 48 83 c7 08          add    $0x8,%rdi
 5fa: 49 83 c0 08          add    $0x8,%r8
 5fe: 48 83 c1 f8          add    $0xfffffffffffffff8,%rcx
 602: 48 83 f9 07          cmp    $0x7,%rcx
 606: 77 e8                ja     5f0 <put_cmsg+0xe0>
 608: eb 17                jmp    621 <put_cmsg+0x111>
 60a: 66 0f 1f 44 00 00    nopw   0x0(%rax,%rax,1)
 610: 41 8b 10              mov    (%r8),%edx
 613: 89 17                mov    %edx,(%rdi)
 615: 48 83 c7 04          add    $0x4,%rdi
 619: 49 83 c0 04          add    $0x4,%r8
 61d: 48 83 c1 fc          add    $0xfffffffffffffffc,%rcx
 621: 48 83 f9 03          cmp    $0x3,%rcx
 625: 77 e9                ja     610 <put_cmsg+0x100>
 627: eb 1a                jmp    643 <put_cmsg+0x133>
 629: 0f 1f 80 00 00 00 00 nopl   0x0(%rax)
 630: 41 0f b7 10          movzwl (%r8),%edx
 634: 66 89 17              mov    %dx,(%rdi)
 637: 48 83 c7 02          add    $0x2,%rdi
 63b: 49 83 c0 02          add    $0x2,%r8
 63f: 48 83 c1 fe          add    $0xfffffffffffffffe,%rcx
 643: 48 83 f9 01          cmp    $0x1,%rcx
 647: 77 e7                ja     630 <put_cmsg+0x120>
 649: eb 15                jmp    660 <put_cmsg+0x150>
 64b: 0f 1f 44 00 00        nopl   0x0(%rax,%rax,1)
 650: 41 0f b6 08          movzbl (%r8),%ecx
 654: 88 0f                mov    %cl,(%rdi)
 656: 48 83 c7 01          add    $0x1,%rdi
 65a: 49 83 c0 01          add    $0x1,%r8
 65e: 31 c9                xor    %ecx,%ecx
 660: 48 85 c9              test   %rcx,%rcx
 663: 75 eb                jne    650 <put_cmsg+0x140>


> FWIW, this
> void bar(unsigned);
> void foo(unsigned n)
> {
>         while (n >= 8) {
>                 bar(n);
>                 n -= 8;
>         }
>         while (n >= 4) {
>                 bar(n);
>                 n -= 4;
>         }
>         while (n >= 2) {
>                 bar(n);
>                 n -= 2;
>         }
>         while (n >= 1) {
>                 bar(n);
>                 n -= 1;
>         }
> }
>
> will compile (with -O2) to
>         pushq   %rbp
>         pushq   %rbx
>         movl    %edi, %ebx
>         subq    $8, %rsp
>         cmpl    $7, %edi
>         jbe     .L2
>         movl    %edi, %ebp
> .L3:
>         movl    %ebp, %edi
>         subl    $8, %ebp
>         call    bar@PLT
>         cmpl    $7, %ebp
>         ja      .L3
>         andl    $7, %ebx
> .L2:
>         cmpl    $3, %ebx
>         jbe     .L4
>         movl    %ebx, %edi
>         andl    $3, %ebx
>         call    bar@PLT
> .L4:
>         cmpl    $1, %ebx
>         jbe     .L5
>         movl    %ebx, %edi
>         andl    $1, %ebx
>         call    bar@PLT
> .L5:
>         testl   %ebx, %ebx
>         je      .L1
>         addq    $8, %rsp
>         movl    $1, %edi
>         popq    %rbx
>         popq    %rbp
>         jmp     bar@PLT
> .L1:
>         addq    $8, %rsp
>         popq    %rbx
>         popq    %rbp
>         ret
>
> i.e. loop + if + if + if...