linux-kernel - Re: [patch] measurements, numbers about CONFIG_OPTIMIZE

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 9 Jan 2009 16:25:37 +0100
From:	Ingo Molnar <mingo@...e.hu>
To:	Linus Torvalds <torvalds@...ux-foundation.org>
Cc:	"H. Peter Anvin" <hpa@...or.com>,
	Chris Mason <chris.mason@...cle.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	paulmck@...ux.vnet.ibm.com, Gregory Haskins <ghaskins@...ell.com>,
	Matthew Wilcox <matthew@....cx>,
	Andi Kleen <andi@...stfloor.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	linux-fsdevel <linux-fsdevel@...r.kernel.org>,
	linux-btrfs <linux-btrfs@...r.kernel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Nick Piggin <npiggin@...e.de>,
	Peter Morreale <pmorreale@...ell.com>,
	Sven Dietrich <SDietrich@...ell.com>
Subject: Re: [patch] measurements, numbers about CONFIG_OPTIMIZE_INLINING=y
	impact


* Ingo Molnar <mingo@...e.hu> wrote:

> > I suspect gcc has some pre-inlining heuristics that don't take 
> > constant folding and simplifiation into account - if you look at just 
> > the raw tree of the function without taking the optimization into 
> > account, it will look big.
> 
> Yeah. In my tests GCC 4.3.2 does properly inline that particular asm.

here is how that function ended up looking like here (with Peter's v7, 
which should be close to what Chris tried, looking at his crash dump):

ffffffff81491347 <__mutex_lock_common>:
ffffffff81491347:	55                   	push   %rbp
ffffffff81491348:	48 89 e5             	mov    %rsp,%rbp
ffffffff8149134b:	41 57                	push   %r15
ffffffff8149134d:	4c 8d 7f 08          	lea    0x8(%rdi),%r15
ffffffff81491351:	41 56                	push   %r14
ffffffff81491353:	49 89 f6             	mov    %rsi,%r14
ffffffff81491356:	41 55                	push   %r13
ffffffff81491358:	41 54                	push   %r12
ffffffff8149135a:	4c 8d 67 18          	lea    0x18(%rdi),%r12
ffffffff8149135e:	53                   	push   %rbx
ffffffff8149135f:	48 89 fb             	mov    %rdi,%rbx
ffffffff81491362:	48 83 ec 38          	sub    $0x38,%rsp
ffffffff81491366:	65 4c 8b 2c 25 00 00 	mov    %gs:0x0,%r13
ffffffff8149136d:	00 00 
ffffffff8149136f:	b8 01 00 00 00       	mov    $0x1,%eax
ffffffff81491374:	31 d2                	xor    %edx,%edx
ffffffff81491376:	f0 0f b1 13          	lock cmpxchg %edx,(%rbx)
ffffffff8149137a:	83 f8 01             	cmp    $0x1,%eax
ffffffff8149137d:	75 18                	jne    ffffffff81491397 <__mutex_lock_common+0x50>
ffffffff8149137f:	65 48 8b 04 25 10 00 	mov    %gs:0x10,%rax
ffffffff81491386:	00 00 
ffffffff81491388:	48 2d d8 1f 00 00    	sub    $0x1fd8,%rax
ffffffff8149138e:	48 89 43 18          	mov    %rax,0x18(%rbx)
ffffffff81491392:	e9 dd 00 00 00       	jmpq   ffffffff81491474 <__mutex_lock_common+0x12d>
ffffffff81491397:	85 c0                	test   %eax,%eax
ffffffff81491399:	79 06                	jns    ffffffff814913a1 <__mutex_lock_common+0x5a>
ffffffff8149139b:	4c 39 7b 08          	cmp    %r15,0x8(%rbx)
ffffffff8149139f:	75 19                	jne    ffffffff814913ba <__mutex_lock_common+0x73>
ffffffff814913a1:	49 8b 34 24          	mov    (%r12),%rsi
ffffffff814913a5:	48 85 f6             	test   %rsi,%rsi
ffffffff814913a8:	74 0c                	je     ffffffff814913b6 <__mutex_lock_common+0x6f>
ffffffff814913aa:	48 89 df             	mov    %rbx,%rdi
ffffffff814913ad:	e8 6c dd b9 ff       	callq  ffffffff8102f11e <spin_on_owner>
ffffffff814913b2:	85 c0                	test   %eax,%eax
ffffffff814913b4:	74 04                	je     ffffffff814913ba <__mutex_lock_common+0x73>
ffffffff814913b6:	f3 90                	pause  
ffffffff814913b8:	eb b5                	jmp    ffffffff8149136f <__mutex_lock_common+0x28>
ffffffff814913ba:	4c 8d 63 04          	lea    0x4(%rbx),%r12
ffffffff814913be:	4c 89 e7             	mov    %r12,%rdi
ffffffff814913c1:	e8 d2 0f 00 00       	callq  ffffffff81492398 <_spin_lock>
ffffffff814913c6:	48 8b 53 10          	mov    0x10(%rbx),%rdx
ffffffff814913ca:	48 8d 45 b0          	lea    -0x50(%rbp),%rax
ffffffff814913ce:	4c 89 7d b0          	mov    %r15,-0x50(%rbp)
ffffffff814913d2:	48 89 43 10          	mov    %rax,0x10(%rbx)
ffffffff814913d6:	48 89 02             	mov    %rax,(%rdx)
ffffffff814913d9:	48 89 55 b8          	mov    %rdx,-0x48(%rbp)
ffffffff814913dd:	48 83 ca ff          	or     $0xffffffffffffffff,%rdx
ffffffff814913e1:	4c 89 6d c0          	mov    %r13,-0x40(%rbp)
ffffffff814913e5:	48 89 d0             	mov    %rdx,%rax
ffffffff814913e8:	87 03                	xchg   %eax,(%rbx)
ffffffff814913ea:	ff c8                	dec    %eax
ffffffff814913ec:	74 55                	je     ffffffff81491443 <__mutex_lock_common+0xfc>
ffffffff814913ee:	44 88 f0             	mov    %r14b,%al
ffffffff814913f1:	44 89 f2             	mov    %r14d,%edx
ffffffff814913f4:	83 e0 81             	and    $0xffffffffffffff81,%eax
ffffffff814913f7:	83 e2 01             	and    $0x1,%edx
ffffffff814913fa:	88 45 af             	mov    %al,-0x51(%rbp)
ffffffff814913fd:	89 55 a8             	mov    %edx,-0x58(%rbp)
ffffffff81491400:	48 83 c8 ff          	or     $0xffffffffffffffff,%rax
ffffffff81491404:	87 03                	xchg   %eax,(%rbx)
ffffffff81491406:	ff c8                	dec    %eax
ffffffff81491408:	74 39                	je     ffffffff81491443 <__mutex_lock_common+0xfc>
ffffffff8149140a:	80 7d af 00          	cmpb   $0x0,-0x51(%rbp)
ffffffff8149140e:	74 1c                	je     ffffffff8149142c <__mutex_lock_common+0xe5>
ffffffff81491410:	49 8b 45 08          	mov    0x8(%r13),%rax
ffffffff81491414:	f6 40 10 04          	testb  $0x4,0x10(%rax)
ffffffff81491418:	74 12                	je     ffffffff8149142c <__mutex_lock_common+0xe5>
ffffffff8149141a:	83 7d a8 00          	cmpl   $0x0,-0x58(%rbp)
ffffffff8149141e:	75 65                	jne    ffffffff81491485 <__mutex_lock_common+0x13e>
ffffffff81491420:	4c 89 ef             	mov    %r13,%rdi
ffffffff81491423:	e8 e1 2e bb ff       	callq  ffffffff81044309 <__fatal_signal_pending>
ffffffff81491428:	85 c0                	test   %eax,%eax
ffffffff8149142a:	75 59                	jne    ffffffff81491485 <__mutex_lock_common+0x13e>
ffffffff8149142c:	4d 89 75 00          	mov    %r14,0x0(%r13)
ffffffff81491430:	41 fe 04 24          	incb   (%r12)
ffffffff81491434:	e8 e9 ef ff ff       	callq  ffffffff81490422 <schedule>
ffffffff81491439:	4c 89 e7             	mov    %r12,%rdi
ffffffff8149143c:	e8 57 0f 00 00       	callq  ffffffff81492398 <_spin_lock>
ffffffff81491441:	eb bd                	jmp    ffffffff81491400 <__mutex_lock_common+0xb9>
ffffffff81491443:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
ffffffff81491447:	48 8b 55 b0          	mov    -0x50(%rbp),%rdx
ffffffff8149144b:	48 89 10             	mov    %rdx,(%rax)
ffffffff8149144e:	48 89 42 08          	mov    %rax,0x8(%rdx)
ffffffff81491452:	65 48 8b 04 25 10 00 	mov    %gs:0x10,%rax
ffffffff81491459:	00 00 
ffffffff8149145b:	48 2d d8 1f 00 00    	sub    $0x1fd8,%rax
ffffffff81491461:	4c 39 7b 08          	cmp    %r15,0x8(%rbx)
ffffffff81491465:	48 89 43 18          	mov    %rax,0x18(%rbx)
ffffffff81491469:	75 06                	jne    ffffffff81491471 <__mutex_lock_common+0x12a>
ffffffff8149146b:	c7 03 00 00 00 00    	movl   $0x0,(%rbx)
ffffffff81491471:	fe 43 04             	incb   0x4(%rbx)
ffffffff81491474:	31 c0                	xor    %eax,%eax
ffffffff81491476:	48 83 c4 38          	add    $0x38,%rsp
ffffffff8149147a:	5b                   	pop    %rbx
ffffffff8149147b:	41 5c                	pop    %r12
ffffffff8149147d:	41 5d                	pop    %r13
ffffffff8149147f:	41 5e                	pop    %r14
ffffffff81491481:	41 5f                	pop    %r15
ffffffff81491483:	c9                   	leaveq 
ffffffff81491484:	c3                   	retq   
ffffffff81491485:	48 8b 55 b0          	mov    -0x50(%rbp),%rdx
ffffffff81491489:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
ffffffff8149148d:	48 89 42 08          	mov    %rax,0x8(%rdx)
ffffffff81491491:	48 89 10             	mov    %rdx,(%rax)
ffffffff81491494:	fe 43 04             	incb   0x4(%rbx)
ffffffff81491497:	b8 fc ff ff ff       	mov    $0xfffffffc,%eax
ffffffff8149149c:	eb d8                	jmp    ffffffff81491476 <__mutex_lock_common+0x12f>

the "lock cmpxchg" ended up being inlined properly at ffffffff81491376, 
and the whole assembly sequence looks pretty compact to me.

That does not let GCC off the hook though, and i'd be the last one to 
defend it when it messes up.

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/