lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Fri, 22 Jun 2012 15:54:56 -0400
From:	Steven Rostedt <rostedt@...dmis.org>
To:	Hagen Paul Pfeifer <hagen@...u.net>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Ingo Molnar <mingo@...nel.org>, linux-kernel@...r.kernel.org,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [GIT PULL] perf fixes

On Fri, 2012-06-22 at 21:06 +0200, Hagen Paul Pfeifer wrote:

> >I may be more sensitive to this than most, because I look at profiles
> >and the function prologue just looks very ugly with the call mcount
> >thing. Ugh.
> 
> Yes, ugh. Even Stevens -mfentry replacement do not change things here.

Why doesn't -mfentry help here? The link I showed still had frame
pointers enabled. With -mfentry, frame pointers do not need to be
enabled. And my latest patches do not automatically enable frame
pointers when enabling function tracing if -mfentry is supported.

I just ran a bunch of compiles against kernel/sched/core.c:

no pg, no mfentry, no fp:

0000000000000882 <schedule>:
 882:   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
 889:   00 00 
                        887: R_X86_64_32S       current_task
 88b:   57                      push   %rdi
 88c:   48 8b 10                mov    (%rax),%rdx
 88f:   48 85 d2                test   %rdx,%rdx
 892:   74 45                   je     8d9 <schedule+0x57>
 894:   48 83 b8 60 06 00 00    cmpq   $0x0,0x660(%rax)
 89b:   00 
 89c:   75 3b                   jne    8d9 <schedule+0x57>
 89e:   48 8b b8 60 0e 00 00    mov    0xe60(%rax),%rdi
 8a5:   31 c0                   xor    %eax,%eax
 8a7:   48 85 ff                test   %rdi,%rdi
 8aa:   74 1a                   je     8c6 <schedule+0x44>
 8ac:   48 8d 57 08             lea    0x8(%rdi),%rdx
 8b0:   48 39 57 08             cmp    %rdx,0x8(%rdi)
 8b4:   b0 01                   mov    $0x1,%al
 8b6:   75 0e                   jne    8c6 <schedule+0x44>
 8b8:   48 8d 47 18             lea    0x18(%rdi),%rax
 8bc:   48 39 47 18             cmp    %rax,0x18(%rdi)
 8c0:   0f 95 c0                setne  %al
 8c3:   0f b6 c0                movzbl %al,%eax
 8c6:   85 c0                   test   %eax,%eax
 8c8:   74 0f                   je     8d9 <schedule+0x57>
 8ca:   48 85 ff                test   %rdi,%rdi
 8cd:   74 0a                   je     8d9 <schedule+0x57>
 8cf:   be 01 00 00 00          mov    $0x1,%esi
 8d4:   e8 00 00 00 00          callq  8d9 <schedule+0x57>
                        8d5: R_X86_64_PC32      blk_flush_plug_list-0x4
 8d9:   e8 70 f9 ff ff          callq  24e <__schedule>
 8de:   5e                      pop    %rsi
 8df:   c3                      retq   


no pg, no mfentry, with fp:

00000000000008cb <schedule>:
 8cb:   55                      push   %rbp
 8cc:   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
 8d3:   00 00 
                        8d1: R_X86_64_32S       current_task
 8d5:   48 8b 10                mov    (%rax),%rdx
 8d8:   48 89 e5                mov    %rsp,%rbp
 8db:   48 85 d2                test   %rdx,%rdx
 8de:   74 45                   je     925 <schedule+0x5a>
 8e0:   48 83 b8 60 06 00 00    cmpq   $0x0,0x660(%rax)
 8e7:   00 
 8e8:   75 3b                   jne    925 <schedule+0x5a>
 8ea:   48 8b b8 60 0e 00 00    mov    0xe60(%rax),%rdi
 8f1:   31 c0                   xor    %eax,%eax
 8f3:   48 85 ff                test   %rdi,%rdi
 8f6:   74 1a                   je     912 <schedule+0x47>
 8f8:   48 8d 57 08             lea    0x8(%rdi),%rdx
 8fc:   48 39 57 08             cmp    %rdx,0x8(%rdi)
 900:   b0 01                   mov    $0x1,%al
 902:   75 0e                   jne    912 <schedule+0x47>
 904:   48 8d 47 18             lea    0x18(%rdi),%rax
 908:   48 39 47 18             cmp    %rax,0x18(%rdi)
 90c:   0f 95 c0                setne  %al
 90f:   0f b6 c0                movzbl %al,%eax
 912:   85 c0                   test   %eax,%eax
 914:   74 0f                   je     925 <schedule+0x5a>
 916:   48 85 ff                test   %rdi,%rdi
 919:   74 0a                   je     925 <schedule+0x5a>
 91b:   be 01 00 00 00          mov    $0x1,%esi
 920:   e8 00 00 00 00          callq  925 <schedule+0x5a>
                        921: R_X86_64_PC32      blk_flush_plug_list-0x4
 925:   e8 41 f9 ff ff          callq  26b <__schedule>
 92a:   5d                      pop    %rbp
 92b:   c3                      retq   

The above is our basis. Now lets look at the current -pg

with pg, no mfentry, with fp:

000000000000090c <schedule>:
 90c:   55                      push   %rbp
 90d:   48 89 e5                mov    %rsp,%rbp
 910:   e8 00 00 00 00          callq  915 <schedule+0x9>
                        911: R_X86_64_PC32      mcount-0x4
 915:   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
 91c:   00 00 
                        91a: R_X86_64_32S       current_task
 91e:   48 8b 10                mov    (%rax),%rdx
 921:   48 85 d2                test   %rdx,%rdx
 924:   74 45                   je     96b <schedule+0x5f>
 926:   48 83 b8 60 06 00 00    cmpq   $0x0,0x660(%rax)
 92d:   00 
 92e:   75 3b                   jne    96b <schedule+0x5f>
 930:   48 8b b8 60 0e 00 00    mov    0xe60(%rax),%rdi
 937:   31 c0                   xor    %eax,%eax
 939:   48 85 ff                test   %rdi,%rdi
 93c:   74 1a                   je     958 <schedule+0x4c>
 93e:   48 8d 57 08             lea    0x8(%rdi),%rdx
 942:   48 39 57 08             cmp    %rdx,0x8(%rdi)
 946:   b0 01                   mov    $0x1,%al
 948:   75 0e                   jne    958 <schedule+0x4c>
 94a:   48 8d 47 18             lea    0x18(%rdi),%rax
 94e:   48 39 47 18             cmp    %rax,0x18(%rdi)
 952:   0f 95 c0                setne  %al
 955:   0f b6 c0                movzbl %al,%eax
 958:   85 c0                   test   %eax,%eax
 95a:   74 0f                   je     96b <schedule+0x5f>
 95c:   48 85 ff                test   %rdi,%rdi
 95f:   74 0a                   je     96b <schedule+0x5f>
 961:   be 01 00 00 00          mov    $0x1,%esi
 966:   e8 00 00 00 00          callq  96b <schedule+0x5f>
                        967: R_X86_64_PC32      blk_flush_plug_list-0x4
 96b:   e8 37 f9 ff ff          callq  2a7 <__schedule>
 970:   5d                      pop    %rbp
 971:   c3                      retq   

Looks like %rsp is saved in %rbp here as well as the call to mcount.

-pg must have frame pointers when -mfentry is not included, so there is
no 'with pg, no mfentry, no fp'. Now lets look at mfentry:

with pg, with mfentry, with fp:

000000000000090c <schedule>:
 90c:   e8 00 00 00 00          callq  911 <schedule+0x5>
                        90d: R_X86_64_PC32      __fentry__-0x4
 911:   55                      push   %rbp
 912:   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
 919:   00 00 
                        917: R_X86_64_32S       current_task
 91b:   48 8b 10                mov    (%rax),%rdx
 91e:   48 89 e5                mov    %rsp,%rbp
 921:   48 85 d2                test   %rdx,%rdx
 924:   74 45                   je     96b <schedule+0x5f>
 926:   48 83 b8 60 06 00 00    cmpq   $0x0,0x660(%rax)
 92d:   00 
 92e:   75 3b                   jne    96b <schedule+0x5f>
 930:   48 8b b8 60 0e 00 00    mov    0xe60(%rax),%rdi
 937:   31 c0                   xor    %eax,%eax
 939:   48 85 ff                test   %rdi,%rdi
 93c:   74 1a                   je     958 <schedule+0x4c>
 93e:   48 8d 57 08             lea    0x8(%rdi),%rdx
 942:   48 39 57 08             cmp    %rdx,0x8(%rdi)
 946:   b0 01                   mov    $0x1,%al
 948:   75 0e                   jne    958 <schedule+0x4c>
 94a:   48 8d 47 18             lea    0x18(%rdi),%rax
 94e:   48 39 47 18             cmp    %rax,0x18(%rdi)
 952:   0f 95 c0                setne  %al
 955:   0f b6 c0                movzbl %al,%eax
 958:   85 c0                   test   %eax,%eax
 95a:   74 0f                   je     96b <schedule+0x5f>
 95c:   48 85 ff                test   %rdi,%rdi
 95f:   74 0a                   je     96b <schedule+0x5f>
 961:   be 01 00 00 00          mov    $0x1,%esi
 966:   e8 00 00 00 00          callq  96b <schedule+0x5f>
                        967: R_X86_64_PC32      blk_flush_plug_list-0x4
 96b:   e8 37 f9 ff ff          callq  2a7 <__schedule>
 970:   5d                      pop    %rbp
 971:   c3                      retq   


It is identical with non -pg and frame pointers, except that we added a
call to fentry in the start of the function.

with pg, with fentry, no fp:

00000000000008c3 <schedule>:
 8c3:   e8 00 00 00 00          callq  8c8 <schedule+0x5>
                        8c4: R_X86_64_PC32      __fentry__-0x4
 8c8:   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
 8cf:   00 00 
                        8cd: R_X86_64_32S       current_task
 8d1:   57                      push   %rdi
 8d2:   48 8b 10                mov    (%rax),%rdx
 8d5:   48 85 d2                test   %rdx,%rdx
 8d8:   74 45                   je     91f <schedule+0x5c>
 8da:   48 83 b8 60 06 00 00    cmpq   $0x0,0x660(%rax)
 8e1:   00 
 8e2:   75 3b                   jne    91f <schedule+0x5c>
 8e4:   48 8b b8 60 0e 00 00    mov    0xe60(%rax),%rdi
 8eb:   31 c0                   xor    %eax,%eax
 8ed:   48 85 ff                test   %rdi,%rdi
 8f0:   74 1a                   je     90c <schedule+0x49>
 8f2:   48 8d 57 08             lea    0x8(%rdi),%rdx
 8f6:   48 39 57 08             cmp    %rdx,0x8(%rdi)
 8fa:   b0 01                   mov    $0x1,%al
 8fc:   75 0e                   jne    90c <schedule+0x49>
 8fe:   48 8d 47 18             lea    0x18(%rdi),%rax
 902:   48 39 47 18             cmp    %rax,0x18(%rdi)
 906:   0f 95 c0                setne  %al
 909:   0f b6 c0                movzbl %al,%eax
 90c:   85 c0                   test   %eax,%eax
 90e:   74 0f                   je     91f <schedule+0x5c>
 910:   48 85 ff                test   %rdi,%rdi
 913:   74 0a                   je     91f <schedule+0x5c>
 915:   be 01 00 00 00          mov    $0x1,%esi
 91a:   e8 00 00 00 00          callq  91f <schedule+0x5c>
                        91b: R_X86_64_PC32      blk_flush_plug_list-0x4
 91f:   e8 66 f9 ff ff          callq  28a <__schedule>
 924:   5e                      pop    %rsi
 925:   c3                      retq   

Now here's the big difference from -pg. This is identical to compiling
without frame pointers with the exception of the fentry call at the
start of the function.

Now what's the issue with function prologues with -mfentry?

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ