lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 2 Dec 2009 15:21:08 +0100
From:	Ingo Molnar <mingo@...e.hu>
To:	Jan Beulich <JBeulich@...ell.com>
Cc:	a.p.zijlstra@...llo.nl, tglx@...utronix.de,
	torvalds@...ux-foundation.org, mingo@...hat.com, npiggin@...e.de,
	linux-kernel@...r.kernel.org, linux-tip-commits@...r.kernel.org,
	hpa@...or.com
Subject: Re: [tip:core/locking] locking, x86: Slightly shorten
 __ticket_spin_trylock()


* Jan Beulich <JBeulich@...ell.com> wrote:

> >>> Ingo Molnar <mingo@...e.hu> 02.12.09 14:29 >>>
> >at first quick sight, this bit looks odd:
> >
> >+       union { int i; bool b; } new;
> >
> >+       return new.b;
> >
> >shouldnt that be short based, to work correctly in the 0-255 CPUs case?
> 
> No, I can't see why. In both instances, we're using (and had been 
> using previously, just with the added movzbl) the outcome of a setCC 
> instruction, which produces valid bool (single byte) values. It is 
> precisely that reason why I needed to introduce these unions, since 
> the upper bytes of the register aren't valid (and shouldn't be looked 
> at by the caller).
> 
> Do you happen to have the vmlinux binary still around, to look at the 
> code your compiler generated (I went through the code I got in quite a 
> bit of detail to make sure it got translated correctly)?

I think the smoking gun is:

 CONFIG_PARAVIRT_GUEST=y
 CONFIG_PARAVIRT=y
 CONFIG_PARAVIRT_SPINLOCKS=y
 CONFIG_PARAVIRT_CLOCK=y
 # CONFIG_PARAVIRT_DEBUG is not set

I disassembled the relevant bits for you:

792a9933 <_raw_spin_unlock>:
792a9933:	56                   	push   %esi
792a9934:	53                   	push   %ebx
792a9935:	89 c3                	mov    %eax,%ebx
792a9937:	81 78 04 ad 4e ad de 	cmpl   $0xdead4ead,0x4(%eax)
792a993e:	74 0a                	je     792a994a <_raw_spin_unlock+0x17>
792a9940:	ba 30 00 dd 79       	mov    $0x79dd0030,%edx
792a9945:	e8 66 ff ff ff       	call   792a98b0 <spin_bug>
792a994a:	89 d8                	mov    %ebx,%eax
792a994c:	ff 15 c0 6c f2 79    	call   *0x79f26cc0
792a9952:	85 c0                	test   %eax,%eax
792a9954:	75 0c                	jne    792a9962 <_raw_spin_unlock+0x2f>
792a9956:	ba bf 00 dd 79       	mov    $0x79dd00bf,%edx
792a995b:	89 d8                	mov    %ebx,%eax
792a995d:	e8 4e ff ff ff       	call   792a98b0 <spin_bug>
792a9962:	64 a1 84 a4 0a 7a    	mov    %fs:0x7a0aa484,%eax
792a9968:	39 43 0c             	cmp    %eax,0xc(%ebx)
792a996b:	74 0c                	je     792a9979 <_raw_spin_unlock+0x46>
792a996d:	ba 3a 00 dd 79       	mov    $0x79dd003a,%edx
792a9972:	89 d8                	mov    %ebx,%eax
792a9974:	e8 37 ff ff ff       	call   792a98b0 <spin_bug>
792a9979:	64 a1 e0 c0 0a 7a    	mov    %fs:0x7a0ac0e0,%eax
792a997f:	39 43 08             	cmp    %eax,0x8(%ebx)
792a9982:	74 0c                	je     792a9990 <_raw_spin_unlock+0x5d>
792a9984:	ba 46 00 dd 79       	mov    $0x79dd0046,%edx
792a9989:	89 d8                	mov    %ebx,%eax
792a998b:	e8 20 ff ff ff       	call   792a98b0 <spin_bug>
792a9990:	c7 43 0c ff ff ff ff 	movl   $0xffffffff,0xc(%ebx)
792a9997:	89 d8                	mov    %ebx,%eax
792a9999:	c7 43 08 ff ff ff ff 	movl   $0xffffffff,0x8(%ebx)
792a99a0:	ff 15 d4 6c f2 79    	call   *0x79f26cd4
792a99a6:	5b                   	pop    %ebx
792a99a7:	5e                   	pop    %esi
792a99a8:	c3                   	ret    

792a99a9 <_raw_spin_lock>:
792a99a9:	55                   	push   %ebp
792a99aa:	57                   	push   %edi
792a99ab:	56                   	push   %esi
792a99ac:	53                   	push   %ebx
792a99ad:	89 c3                	mov    %eax,%ebx
792a99af:	83 ec 10             	sub    $0x10,%esp
792a99b2:	81 78 04 ad 4e ad de 	cmpl   $0xdead4ead,0x4(%eax)
792a99b9:	74 0a                	je     792a99c5 <_raw_spin_lock+0x1c>
792a99bb:	ba 30 00 dd 79       	mov    $0x79dd0030,%edx
792a99c0:	e8 eb fe ff ff       	call   792a98b0 <spin_bug>
792a99c5:	64 a1 84 a4 0a 7a    	mov    %fs:0x7a0aa484,%eax
792a99cb:	39 43 0c             	cmp    %eax,0xc(%ebx)
792a99ce:	75 0c                	jne    792a99dc <_raw_spin_lock+0x33>
792a99d0:	ba a1 3e e0 79       	mov    $0x79e03ea1,%edx
792a99d5:	89 d8                	mov    %ebx,%eax
792a99d7:	e8 d4 fe ff ff       	call   792a98b0 <spin_bug>
792a99dc:	64 a1 e0 c0 0a 7a    	mov    %fs:0x7a0ac0e0,%eax
792a99e2:	39 43 08             	cmp    %eax,0x8(%ebx)
792a99e5:	75 0c                	jne    792a99f3 <_raw_spin_lock+0x4a>
792a99e7:	ba 50 00 dd 79       	mov    $0x79dd0050,%edx
792a99ec:	89 d8                	mov    %ebx,%eax
792a99ee:	e8 bd fe ff ff       	call   792a98b0 <spin_bug>
792a99f3:	89 d8                	mov    %ebx,%eax
792a99f5:	ff 15 d0 6c f2 79    	call   *0x79f26cd0
792a99fb:	85 c0                	test   %eax,%eax
792a99fd:	0f 85 8c 00 00 00    	jne    792a9a8f <_raw_spin_lock+0xe6>
792a9a03:	6b 15 78 55 f2 79 64 	imul   $0x64,0x79f25578,%edx
792a9a0a:	bd 01 00 00 00       	mov    $0x1,%ebp
792a9a0f:	64 a1 84 a4 0a 7a    	mov    %fs:0x7a0aa484,%eax
792a9a15:	89 44 24 04          	mov    %eax,0x4(%esp)
792a9a19:	05 0c 03 00 00       	add    $0x30c,%eax
792a9a1e:	89 df                	mov    %ebx,%edi
792a9a20:	89 44 24 0c          	mov    %eax,0xc(%esp)
792a9a24:	89 54 24 08          	mov    %edx,0x8(%esp)
792a9a28:	31 db                	xor    %ebx,%ebx
792a9a2a:	31 f6                	xor    %esi,%esi
792a9a2c:	eb 19                	jmp    792a9a47 <_raw_spin_lock+0x9e>
792a9a2e:	89 f8                	mov    %edi,%eax
792a9a30:	ff 15 d0 6c f2 79    	call   *0x79f26cd0
792a9a36:	85 c0                	test   %eax,%eax
792a9a38:	75 53                	jne    792a9a8d <_raw_spin_lock+0xe4>
792a9a3a:	b0 01                	mov    $0x1,%al
792a9a3c:	e8 2f 5a ff ff       	call   7929f470 <__delay>
792a9a41:	83 c3 01             	add    $0x1,%ebx
792a9a44:	83 d6 00             	adc    $0x0,%esi
792a9a47:	83 fe 00             	cmp    $0x0,%esi
792a9a4a:	77 06                	ja     792a9a52 <_raw_spin_lock+0xa9>
792a9a4c:	3b 5c 24 08          	cmp    0x8(%esp),%ebx
792a9a50:	72 dc                	jb     792a9a2e <_raw_spin_lock+0x85>
792a9a52:	85 ed                	test   %ebp,%ebp
792a9a54:	75 06                	jne    792a9a5c <_raw_spin_lock+0xb3>
792a9a56:	31 db                	xor    %ebx,%ebx
792a9a58:	31 f6                	xor    %esi,%esi
792a9a5a:	eb f0                	jmp    792a9a4c <_raw_spin_lock+0xa3>
792a9a5c:	57                   	push   %edi
792a9a5d:	8b 54 24 08          	mov    0x8(%esp),%edx
792a9a61:	31 ed                	xor    %ebp,%ebp
792a9a63:	64 a1 e0 c0 0a 7a    	mov    %fs:0x7a0ac0e0,%eax
792a9a69:	ff b2 00 02 00 00    	pushl  0x200(%edx)
792a9a6f:	ff 74 24 14          	pushl  0x14(%esp)
792a9a73:	50                   	push   %eax
792a9a74:	68 d0 00 dd 79       	push   $0x79dd00d0
792a9a79:	e8 8a 26 7d 00       	call   79a7c108 <printk>
792a9a7e:	e8 56 25 7d 00       	call   79a7bfd9 <dump_stack>
792a9a83:	e8 6e 81 d6 ff       	call   79011bf6 <arch_trigger_all_cpu_backtrace>
792a9a88:	83 c4 14             	add    $0x14,%esp
792a9a8b:	eb 9b                	jmp    792a9a28 <_raw_spin_lock+0x7f>
792a9a8d:	89 fb                	mov    %edi,%ebx
792a9a8f:	64 a1 e0 c0 0a 7a    	mov    %fs:0x7a0ac0e0,%eax
792a9a95:	89 43 08             	mov    %eax,0x8(%ebx)
792a9a98:	64 a1 84 a4 0a 7a    	mov    %fs:0x7a0aa484,%eax
792a9a9e:	89 43 0c             	mov    %eax,0xc(%ebx)
792a9aa1:	83 c4 10             	add    $0x10,%esp
792a9aa4:	5b                   	pop    %ebx
792a9aa5:	5e                   	pop    %esi
792a9aa6:	5f                   	pop    %edi
792a9aa7:	5d                   	pop    %ebp
792a9aa8:	c3                   	ret    
792a9aa9:	90                   	nop
792a9aaa:	90                   	nop
792a9aab:	90                   	nop

79016a04 <__ticket_spin_lock>:
79016a04:	ba 00 01 00 00       	mov    $0x100,%edx
79016a09:	f0 66 0f c1 10       	lock xadd %dx,(%eax)
79016a0e:	38 f2                	cmp    %dh,%dl
79016a10:	74 06                	je     79016a18 <__ticket_spin_lock+0x14>
79016a12:	f3 90                	pause  
79016a14:	8a 10                	mov    (%eax),%dl
79016a16:	eb f6                	jmp    79016a0e <__ticket_spin_lock+0xa>
79016a18:	c3                   	ret    

79016a19 <__ticket_spin_trylock>:
79016a19:	89 c2                	mov    %eax,%edx
79016a1b:	0f b7 02             	movzwl (%edx),%eax
79016a1e:	38 e0                	cmp    %ah,%al
79016a20:	8d 88 00 01 00 00    	lea    0x100(%eax),%ecx
79016a26:	75 05                	jne    79016a2d <__ticket_spin_trylock+0x14>
79016a28:	f0 66 0f b1 0a       	lock cmpxchg %cx,(%edx)
79016a2d:	0f 94 c1             	sete   %cl
79016a30:	88 c8                	mov    %cl,%al
79016a32:	c3                   	ret    

79016a33 <__ticket_spin_unlock>:
79016a33:	fe 00                	incb   (%eax)
79016a35:	c3                   	ret    

79016a36 <__ticket_spin_is_locked>:
79016a36:	8b 10                	mov    (%eax),%edx
79016a38:	89 d0                	mov    %edx,%eax
79016a3a:	c1 f8 08             	sar    $0x8,%eax
79016a3d:	31 d0                	xor    %edx,%eax
79016a3f:	84 c0                	test   %al,%al
79016a41:	0f 95 c0             	setne  %al
79016a44:	0f b6 c0             	movzbl %al,%eax
79016a47:	c3                   	ret    

79016a48 <__ticket_spin_is_contended>:
79016a48:	8b 10                	mov    (%eax),%edx
79016a4a:	89 d0                	mov    %edx,%eax
79016a4c:	c1 f8 08             	sar    $0x8,%eax
79016a4f:	29 d0                	sub    %edx,%eax
79016a51:	25 ff 00 00 00       	and    $0xff,%eax
79016a56:	48                   	dec    %eax
79016a57:	0f 9f c0             	setg   %al
79016a5a:	0f b6 c0             	movzbl %al,%eax
79016a5d:	c3                   	ret    

79016a5e <default_spin_lock_flags>:
79016a5e:	ff 15 c8 6c f2 79    	call   *0x79f26cc8
79016a64:	c3                   	ret    
79016a65:	90                   	nop
79016a66:	90                   	nop
79016a67:	90                   	nop

I've also attached the config.

	Ingo

View attachment "config" of type "text/plain" (76656 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ