lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150410112748.GB30477@gmail.com>
Date:	Fri, 10 Apr 2015 13:27:48 +0200
From:	Ingo Molnar <mingo@...nel.org>
To:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Jason Low <jason.low2@...com>,
	Peter Zijlstra <peterz@...radead.org>,
	Davidlohr Bueso <dave@...olabs.net>,
	Tim Chen <tim.c.chen@...ux.intel.com>,
	Aswin Chandramouleeswaran <aswin@...com>,
	LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH] mutex: Improve mutex_spin_on_owner() code generation


* Ingo Molnar <mingo@...nel.org> wrote:

> although the double unrolled need_resched() check looks silly:
> 
>   4d:	48 8b 80 10 c0 ff ff 	mov    -0x3ff0(%rax),%rax
>   54:	a8 08                	test   $0x8,%al
> 
>   75:	48 8b 81 10 c0 ff ff 	mov    -0x3ff0(%rcx),%rax
>   7c:	a8 08                	test   $0x8,%al

The patch below fixes that and shaves 40 bytes off 
mutex_spin_on_owner()'s code size.

Thanks,

	Ingo

===================================>
>From 065e46b7398e38f2e4be98c453e797ee511170e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@...nel.org>
Date: Fri, 10 Apr 2015 13:21:24 +0200
Subject: [PATCH] mutex: Improve mutex_spin_on_owner() code generation

GCC somewhat stupidly decides that the loop within mutex_spin_on_owner()
needs unrolling:

0000000000000030 <mutex_spin_on_owner.isra.4>:
  30:	48 3b 37             	cmp    (%rdi),%rsi
  33:	55                   	push   %rbp
  34:	48 89 e5             	mov    %rsp,%rbp
  37:	75 4f                	jne    88 <mutex_spin_on_owner.isra.4+0x58>
  39:	48 8d 56 28          	lea    0x28(%rsi),%rdx
  3d:	8b 46 28             	mov    0x28(%rsi),%eax
  40:	85 c0                	test   %eax,%eax
  42:	74 3c                	je     80 <mutex_spin_on_owner.isra.4+0x50>
  44:	65 48 8b 04 25 00 00 	mov    %gs:0x0,%rax
  4b:	00 00
  4d:	48 8b 80 10 c0 ff ff 	mov    -0x3ff0(%rax),%rax
  54:	a8 08                	test   $0x8,%al
  56:	75 28                	jne    80 <mutex_spin_on_owner.isra.4+0x50>
  58:	65 48 8b 0c 25 00 00 	mov    %gs:0x0,%rcx
  5f:	00 00
  61:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)
  68:	f3 90                	pause
  6a:	48 3b 37             	cmp    (%rdi),%rsi
  6d:	75 19                	jne    88 <mutex_spin_on_owner.isra.4+0x58>
  6f:	8b 02                	mov    (%rdx),%eax
  71:	85 c0                	test   %eax,%eax
  73:	74 0b                	je     80 <mutex_spin_on_owner.isra.4+0x50>
  75:	48 8b 81 10 c0 ff ff 	mov    -0x3ff0(%rcx),%rax
  7c:	a8 08                	test   $0x8,%al
  7e:	74 e8                	je     68 <mutex_spin_on_owner.isra.4+0x38>
  80:	31 c0                	xor    %eax,%eax
  82:	5d                   	pop    %rbp
  83:	c3                   	retq
  84:	0f 1f 40 00          	nopl   0x0(%rax)
  88:	b8 01 00 00 00       	mov    $0x1,%eax
  8d:	5d                   	pop    %rbp
  8e:	c3                   	retq

The need_resched() check is duplicated:

  4d:	48 8b 80 10 c0 ff ff 	mov    -0x3ff0(%rax),%rax
  54:	a8 08                	test   $0x8,%al
  56:	75 28                	jne    80 <mutex_spin_on_owner.isra.4+0x50>

  75:	48 8b 81 10 c0 ff ff 	mov    -0x3ff0(%rcx),%rax
  7c:	a8 08                	test   $0x8,%al
  7e:	74 e8                	je     68 <mutex_spin_on_owner.isra.4+0x38>

So restructure the loop a bit, to get much tighter code:

0000000000000030 <mutex_spin_on_owner.isra.5>:
  30:	55                   	push   %rbp
  31:	65 48 8b 14 25 00 00 	mov    %gs:0x0,%rdx
  38:	00 00
  3a:	48 89 e5             	mov    %rsp,%rbp
  3d:	48 39 37             	cmp    %rsi,(%rdi)
  40:	75 1e                	jne    60 <mutex_spin_on_owner.isra.5+0x30>
  42:	8b 46 28             	mov    0x28(%rsi),%eax
  45:	85 c0                	test   %eax,%eax
  47:	74 0d                	je     56 <mutex_spin_on_owner.isra.5+0x26>
  49:	f3 90                	pause
  4b:	48 8b 82 10 c0 ff ff 	mov    -0x3ff0(%rdx),%rax
  52:	a8 08                	test   $0x8,%al
  54:	74 e7                	je     3d <mutex_spin_on_owner.isra.5+0xd>
  56:	31 c0                	xor    %eax,%eax
  58:	5d                   	pop    %rbp
  59:	c3                   	retq
  5a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
  60:	b8 01 00 00 00       	mov    $0x1,%eax
  65:	5d                   	pop    %rbp
  66:	c3                   	retq

What changed relative to the previous loop is that cpu_relax()
is done before the need_resched() check. This in fact makes sense:
only after waiting a bit (or a lot, on virtualized platforms) should
we expect 'need_resched' to have changed.

Not-Yet-Signed-off-by: Ingo Molnar <mingo@...nel.org>
---
 kernel/locking/mutex.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 68c750f4e8e8..45d2445e457a 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -225,9 +225,12 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
 static noinline
 bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 {
-	int on_cpu;
+	int owner_on_cpu;
+
+	for (;;) {
+		if (unlikely(lock->owner != owner))
+			return true;
 
-	while (lock->owner == owner) {
 		/*
 		 * Use the non-faulting copy-user primitive to get the owner->on_cpu
 		 * value that works even on CONFIG_DEBUG_PAGEALLOC=y instrumented
@@ -251,15 +254,16 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 		 * NOTE2: We ignore failed copies, as the next iteration will clean
 		 *        up after us. This saves an extra branch in the common case.
 		 */
-		get_kernel(on_cpu, &owner->on_cpu);
-
-		if (!on_cpu || need_resched())
+		get_kernel(owner_on_cpu, &owner->on_cpu);
+		if (unlikely(!owner_on_cpu))
 			return false;
 
 		cpu_relax_lowlatency();
-	}
 
-	return true;
+		/* Stop spinning if we are to be preempted: */
+		if (need_resched())
+			return false;
+	}
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ