linux-kernel - Re: Does perf-annotate work correctly?

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170912143350.GA3452@kernel.org>
Date:   Tue, 12 Sep 2017 11:33:50 -0300
From:   Arnaldo Carvalho de Melo <acme@...nel.org>
To:     "Du, Changbin" <changbin.du@...el.com>
Cc:     peterz@...radead.org, mingo@...hat.com,
        alexander.shishkin@...ux.intel.com, linux-kernel@...r.kernel.org
Subject: Re: Does perf-annotate work correctly?

Em Tue, Sep 12, 2017 at 06:10:35PM +0800, Du, Changbin escreveu:
> When a annotate a symbol, I find the annotated C source code doesn't match assembly code.
> So I cannot determine which line of C code has much overhead withou gdb's help.
> 
> Here is a example result of function apic_has_interrupt_for_ppr() in kvm module.

Ok, was this using the module .ko file or /proc/kcore? You forgot to
cut'n'paste the first line on the screen.

Also, how did you use gdb?

perf uses objdump to do the disassembly, and depending on how it is used
(live system, post processing on a different machine, permissions) it
may use different files to do the disassembly.

Please provide more detailed information on the exact command line
arguments and usage scenario.
 
- Arnaldo

>        │580         __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);    ▒
>        │581 }                                                                            ▒
>        │                                                                                 ▒
>        │583 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)       ▒
>        │584 {                                                                            ▒
>   0.88 │30:   cmpb   $0x0,0x91(%rdi)                                                     ▒
>   2.54 │    ↓ je     63                                                                  ▒
>   0.20 │      mov    0xa0(%rdi),%rcx                                                     ▒
>        │581         int highest_irr;                                                     ▒
>        │582         if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)   ▒
>   4.91 │      mov    $0xe0,%eax                       x                                   ▒
>   1.46 │45:   mov    %eax,%edx                        x                                   ▒
>   0.02 │      sar    $0x5,%edx                        x                                   ▒
>   3.57 │      shl    $0x4,%edx                        x                                   ▒
>   3.34 │      movslq %edx,%rdx                        x                                   ▒
>   1.25 │      mov    0x200(%rcx,%rdx,1),%edx          x                                   ▒
>  42.44 │      test   %edx,%edx                        x                                   ▒
>   0.01 │   ┌──jne    88                               x                                   ▒
>   3.48 │   │  sub    $0x20,%eax                       x                                   ▒
>   2.24 │   │  cmp    $0xffffffe0,%eax                 x                                   ▒
>        │586│apic_find_highest_irr():                                                     ▒
>        │   │                                                                             ▒
>        │407│        /*                                                                   ▒
>        │408│         * Note that irr_pending is just a hint. It will be always           ▒
>        │409│         * true with virtual interrupt delivery enabled.                     ▒
>        │410│         */                                                                  ▒
>        │411│        if (!apic->irr_pending)                                              ▒
>        │   │↑ jne    45                                                                  ▒
>   0.62 │63:│  mov    $0xffffffff,%eax                                                    ◆
>   0.83 │   │  leaveq                                                                     ▒
>  13.52 │   │← retq                                                                       ▒
>        │6a:│  mov    %esi,-0x4(%rbp)                                                     ▒
>        │   │  mov    %rdx,%rdi                                                           ▒
>        │418│find_highest_vector():                                                       ▒
>        │340│static int find_highest_vector(void *bitmap)                                 ▒
>        │341│{                                                                            ▒
>        │342│        int vec;                                                             ▒
>        │343│        u32 *reg;                                                            ▒
>        │   │                                                                             ▒
>        │345│        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;                   ▒
>        │   │→ callq  *%rax                                                               ▒
>        │   │  mov    -0x4(%rbp),%esi                                                     ▒
>        │343│             vec >= 0; vec -= APIC_VECTORS_PER_REG) {                        ▒
>        │344│                reg = bitmap + REG_POS(vec);                                 ▒
>        │345│                if (*reg)                                                    ▒
>   0.05 │75:│  cmp    $0xffffffff,%eax                                                    ▒
>        │   │↑ je     63                                                                  ▒
>   1.95 │   │  mov    %eax,%edx                                                           ▒
>   1.45 │   │  and    $0xf0,%edx                                                          
> 
> 
> Look at the assembly code block where I have put a 'x' on the right. Apparently the
> assembly code doesn't match the C source code arrounded. Let's look the correct disassemble
> result from gdb:
> 
> 340		for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
>    0x000000000003b4e0 <+64>:	mov    $0xe0,%eax
> 
> 342			reg = bitmap + REG_POS(vec);
> 343			if (*reg)
>    0x000000000003b4e5 <+69>:	mov    %eax,%edx
>    0x000000000003b4e7 <+71>:	sar    $0x5,%edx
>    0x000000000003b4ea <+74>:	shl    $0x4,%edx
>    0x000000000003b4ed <+77>:	movslq %edx,%rdx
>    0x000000000003b4f0 <+80>:	mov    0x200(%rcx,%rdx,1),%edx
>    0x000000000003b4f7 <+87>:	test   %edx,%edx
>    0x000000000003b4f9 <+89>:	jne    0x3b528 <apic_has_interrupt_for_ppr+136>
> 
> 341		     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
>    0x000000000003b4fb <+91>:	sub    $0x20,%eax
> 
> 340		for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
>    0x000000000003b4fe <+94>:	cmp    $0xffffffe0,%eax
>    0x000000000003b501 <+97>:	jne    0x3b4e5 <apic_has_interrupt_for_ppr+69>
> 
> 
> Compared to gdb, perf-annoate has messed up. is it a bug or just perf is not as perfect as gdb?
> 
> -- 
> Thanks,
> Changbin Du