lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cd08e02d-e6c7-237a-f188-d53a2cd5e512@windriver.com>
Date:   Sat, 10 Jul 2021 19:31:47 +0800
From:   "Xu, Yanfei" <yanfei.xu@...driver.com>
To:     Mel Gorman <mgorman@...hsingularity.net>
Cc:     akpm@...ux-foundation.org, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] mm/page_alloc: avoid counting event if no successful
 allocation



On 7/9/21 8:26 PM, Mel Gorman wrote:
> [Please note: This e-mail is from an EXTERNAL e-mail address]
> 
> On Fri, Jul 09, 2021 at 06:28:55PM +0800, Yanfei Xu wrote:
>> While the nr_populated is non-zero, however the nr_account might be
>> zero if allocating fails. In this case, not to count event can save
>> some cycles.
>>
> 
> The much more likely path is that nr_account is positive so we avoid a
> branch in the common case.
> 

Got it.

>> And this commit extract the check of "page_array" from a while
>> statement to avoid unnecessary checks for it.
>>
> 
> I'm surprised the compiler does not catch that page_array is invariant
> for the loop. Did you check if gcc generates different code is page_array
> is explicitly checked once instead of putting it in the loop?
> 

Hm.. In fact, the page_array always doesn't appear in the loop due to 
the compiler's optimization. And I just confirmed the assembly.

not apply my patch:

ffffffff81267100 <__alloc_pages_bulk>:
ffffffff81267100:       e8 0b 73 df ff          callq  ffffffff8105e410 
<__fentry__>
ffffffff81267105:       55                      push   %rbp
ffffffff81267106:       48 89 e5                mov    %rsp,%rbp
ffffffff81267109:       41 57                   push   %r15
ffffffff8126710b:       41 56                   push   %r14
ffffffff8126710d:       41 55                   push   %r13
ffffffff8126710f:       41 54                   push   %r12
ffffffff81267111:       53                      push   %rbx
ffffffff81267112:       48 83 ec 58             sub    $0x58,%rsp
ffffffff81267116:       85 c9                   test   %ecx,%ecx
ffffffff81267118:       89 7d c4                mov    %edi,-0x3c(%rbp)
ffffffff8126711b:       89 75 9c                mov    %esi,-0x64(%rbp)
ffffffff8126711e:       48 89 55 a0             mov    %rdx,-0x60(%rbp)
ffffffff81267122:       89 4d d4                mov    %ecx,-0x2c(%rbp)
ffffffff81267125:       4c 89 45 a8             mov    %r8,-0x58(%rbp)
ffffffff81267129:       4c 89 4d c8             mov    %r9,-0x38(%rbp)
ffffffff8126712d:       0f 8e 7c 05 00 00       jle    ffffffff812676af 
<__alloc_pages_bulk+0x5af>
ffffffff81267133:       4d 85 c9                test   %r9,%r9
ffffffff81267136:       0f 84 84 05 00 00       je     ffffffff812676c0 
<__alloc_pages_bulk+0x5c0>
ffffffff8126713c:       49 83 39 00             cmpq   $0x0,(%r9)
ffffffff81267140:       0f 84 7a 05 00 00       je     ffffffff812676c0 
<__alloc_pages_bulk+0x5c0>
ffffffff81267146:       49 8d 41 08             lea    0x8(%r9),%rax
ffffffff8126714a:       89 ca                   mov    %ecx,%edx
ffffffff8126714c:       45 31 e4                xor    %r12d,%r12d
ffffffff8126714f:       eb 0b                   jmp    ffffffff8126715c 
<__alloc_pages_bulk+0x5c>
ffffffff81267151:       48 83 c0 08             add    $0x8,%rax
ffffffff81267155:       48 83 78 f8 00          cmpq   $0x0,-0x8(%rax)
ffffffff8126715a:       74 1c                   je     ffffffff81267178 
<__alloc_pages_bulk+0x78>
ffffffff8126715c:       41 83 c4 01             add    $0x1,%r12d
ffffffff81267160:       44 39 e2                cmp    %r12d,%edx
ffffffff81267163:       75 ec                   jne    ffffffff81267151 
<__alloc_pages_bulk+0x51>
ffffffff81267165:       48 63 45 d4             movslq -0x2c(%rbp),%rax
ffffffff81267169:       48 83 c4 58             add    $0x58,%rsp
ffffffff8126716d:       5b                      pop    %rbx
ffffffff8126716e:       41 5c                   pop    %r12
ffffffff81267170:       41 5d                   pop    %r13
ffffffff81267172:       41 5e                   pop    %r14
ffffffff81267174:       41 5f                   pop    %r15
ffffffff81267176:       5d                      pop    %rbp
ffffffff81267177:       c3                      retq
ffffffff81267178:       8b 45 d4                mov    -0x2c(%rbp),%eax
ffffffff8126717b:       44 29 e0                sub    %r12d,%eax
ffffffff8126717e:       83 f8 01                cmp    $0x1,%eax
ffffffff81267181:       0f 84 47 04 00 00       je     ffffffff812675ce 
<__alloc_pages_bulk+0x4ce>
ffffffff81267187:       8b 0d 53 22 a4 01       mov 
0x1a42253(%rip),%ecx        # ffffffff82ca93e0 
<page_group_by_mobility_disabled>
ffffffff8126718d:       48 63 45 9c             movslq -0x64(%rbp),%rax
ffffffff81267191:       44 8b 7d c4             mov    -0x3c(%rbp),%r15d
ffffffff81267195:       44 23 3d 58 22 a4 01    and 
0x1a42258(%rip),%r15d        # ffffffff82ca93f4 <gfp_allowed_mask>


applied my patch:

ffffffff81267100 <__alloc_pages_bulk>:
ffffffff81267100:       e8 0b 73 df ff          callq  ffffffff8105e410 
<__fentry__>
ffffffff81267105:       55                      push   %rbp
ffffffff81267106:       48 89 e5                mov    %rsp,%rbp
ffffffff81267109:       41 57                   push   %r15
ffffffff8126710b:       41 56                   push   %r14
ffffffff8126710d:       41 55                   push   %r13
ffffffff8126710f:       41 54                   push   %r12
ffffffff81267111:       53                      push   %rbx
ffffffff81267112:       48 83 ec 60             sub    $0x60,%rsp
ffffffff81267116:       85 c9                   test   %ecx,%ecx
ffffffff81267118:       89 7d c8                mov    %edi,-0x38(%rbp)
ffffffff8126711b:       89 75 9c                mov    %esi,-0x64(%rbp)
ffffffff8126711e:       48 89 55 a0             mov    %rdx,-0x60(%rbp)
ffffffff81267122:       89 4d d0                mov    %ecx,-0x30(%rbp)
ffffffff81267125:       4c 89 45 a8             mov    %r8,-0x58(%rbp)
ffffffff81267129:       0f 8e 91 05 00 00       jle    ffffffff812676c0 
<__alloc_pages_bulk+0x5c0>
ffffffff8126712f:       4d 85 c9                test   %r9,%r9
ffffffff81267132:       4d 89 cf                mov    %r9,%r15
ffffffff81267135:       0f 84 4d 05 00 00       je     ffffffff81267688 
<__alloc_pages_bulk+0x588>
ffffffff8126713b:       8d 49 ff                lea    -0x1(%rcx),%ecx
ffffffff8126713e:       31 c0                   xor    %eax,%eax
ffffffff81267140:       eb 03                   jmp    ffffffff81267145 
<__alloc_pages_bulk+0x45>
ffffffff81267142:       48 89 d0                mov    %rdx,%rax
ffffffff81267145:       49 83 3c c7 00          cmpq   $0x0,(%r15,%rax,8)
ffffffff8126714a:       41 89 c4                mov    %eax,%r12d
ffffffff8126714d:       74 0d                   je     ffffffff8126715c 
<__alloc_pages_bulk+0x5c>
ffffffff8126714f:       44 8d 60 01             lea    0x1(%rax),%r12d
ffffffff81267153:       48 39 c1                cmp    %rax,%rcx
ffffffff81267156:       48 8d 50 01             lea    0x1(%rax),%rdx
ffffffff8126715a:       75 e6                   jne    ffffffff81267142 
<__alloc_pages_bulk+0x42>
ffffffff8126715c:       8b 45 d0                mov    -0x30(%rbp),%eax
ffffffff8126715f:       41 39 c4                cmp    %eax,%r12d
ffffffff81267162:       0f 84 19 04 00 00       je     ffffffff81267581 
<__alloc_pages_bulk+0x481>
ffffffff81267168:       44 29 e0                sub    %r12d,%eax
ffffffff8126716b:       83 f8 01                cmp    $0x1,%eax
ffffffff8126716e:       0f 84 66 04 00 00       je     ffffffff812675da 
<__alloc_pages_bulk+0x4da>
ffffffff81267174:       48 63 45 9c             movslq -0x64(%rbp),%rax
ffffffff81267178:       8b 0d 62 22 a4 01       mov 
0x1a42262(%rip),%ecx        # ffffffff82ca93e0 
<page_group_by_mobility_disabled>
ffffffff8126717e:       8b 5d c8                mov    -0x38(%rbp),%ebx
ffffffff81267181:       23 1d 6d 22 a4 01       and 
0x1a4226d(%rip),%ebx        # ffffffff82ca93f4 <gfp_allowed_mask>


Thanks,
Yanfei

> --
> Mel Gorman
> SUSE Labs
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ