[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150410112748.GB30477@gmail.com>
Date: Fri, 10 Apr 2015 13:27:48 +0200
From: Ingo Molnar <mingo@...nel.org>
To: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>,
Jason Low <jason.low2@...com>,
Peter Zijlstra <peterz@...radead.org>,
Davidlohr Bueso <dave@...olabs.net>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Aswin Chandramouleeswaran <aswin@...com>,
LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH] mutex: Improve mutex_spin_on_owner() code generation
* Ingo Molnar <mingo@...nel.org> wrote:
> although the double unrolled need_resched() check looks silly:
>
> 4d: 48 8b 80 10 c0 ff ff mov -0x3ff0(%rax),%rax
> 54: a8 08 test $0x8,%al
>
> 75: 48 8b 81 10 c0 ff ff mov -0x3ff0(%rcx),%rax
> 7c: a8 08 test $0x8,%al
The patch below fixes that and shaves 40 bytes off
mutex_spin_on_owner()'s code size.
Thanks,
Ingo
===================================>
>From 065e46b7398e38f2e4be98c453e797ee511170e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@...nel.org>
Date: Fri, 10 Apr 2015 13:21:24 +0200
Subject: [PATCH] mutex: Improve mutex_spin_on_owner() code generation
GCC somewhat stupidly decides that the loop within mutex_spin_on_owner()
needs unrolling:
0000000000000030 <mutex_spin_on_owner.isra.4>:
30: 48 3b 37 cmp (%rdi),%rsi
33: 55 push %rbp
34: 48 89 e5 mov %rsp,%rbp
37: 75 4f jne 88 <mutex_spin_on_owner.isra.4+0x58>
39: 48 8d 56 28 lea 0x28(%rsi),%rdx
3d: 8b 46 28 mov 0x28(%rsi),%eax
40: 85 c0 test %eax,%eax
42: 74 3c je 80 <mutex_spin_on_owner.isra.4+0x50>
44: 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
4b: 00 00
4d: 48 8b 80 10 c0 ff ff mov -0x3ff0(%rax),%rax
54: a8 08 test $0x8,%al
56: 75 28 jne 80 <mutex_spin_on_owner.isra.4+0x50>
58: 65 48 8b 0c 25 00 00 mov %gs:0x0,%rcx
5f: 00 00
61: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
68: f3 90 pause
6a: 48 3b 37 cmp (%rdi),%rsi
6d: 75 19 jne 88 <mutex_spin_on_owner.isra.4+0x58>
6f: 8b 02 mov (%rdx),%eax
71: 85 c0 test %eax,%eax
73: 74 0b je 80 <mutex_spin_on_owner.isra.4+0x50>
75: 48 8b 81 10 c0 ff ff mov -0x3ff0(%rcx),%rax
7c: a8 08 test $0x8,%al
7e: 74 e8 je 68 <mutex_spin_on_owner.isra.4+0x38>
80: 31 c0 xor %eax,%eax
82: 5d pop %rbp
83: c3 retq
84: 0f 1f 40 00 nopl 0x0(%rax)
88: b8 01 00 00 00 mov $0x1,%eax
8d: 5d pop %rbp
8e: c3 retq
The need_resched() check is duplicated:
4d: 48 8b 80 10 c0 ff ff mov -0x3ff0(%rax),%rax
54: a8 08 test $0x8,%al
56: 75 28 jne 80 <mutex_spin_on_owner.isra.4+0x50>
75: 48 8b 81 10 c0 ff ff mov -0x3ff0(%rcx),%rax
7c: a8 08 test $0x8,%al
7e: 74 e8 je 68 <mutex_spin_on_owner.isra.4+0x38>
So restructure the loop a bit, to get much tighter code:
0000000000000030 <mutex_spin_on_owner.isra.5>:
30: 55 push %rbp
31: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx
38: 00 00
3a: 48 89 e5 mov %rsp,%rbp
3d: 48 39 37 cmp %rsi,(%rdi)
40: 75 1e jne 60 <mutex_spin_on_owner.isra.5+0x30>
42: 8b 46 28 mov 0x28(%rsi),%eax
45: 85 c0 test %eax,%eax
47: 74 0d je 56 <mutex_spin_on_owner.isra.5+0x26>
49: f3 90 pause
4b: 48 8b 82 10 c0 ff ff mov -0x3ff0(%rdx),%rax
52: a8 08 test $0x8,%al
54: 74 e7 je 3d <mutex_spin_on_owner.isra.5+0xd>
56: 31 c0 xor %eax,%eax
58: 5d pop %rbp
59: c3 retq
5a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
60: b8 01 00 00 00 mov $0x1,%eax
65: 5d pop %rbp
66: c3 retq
What changed relative to the previous loop is that cpu_relax()
is done before the need_resched() check. This in fact makes sense:
only after waiting a bit (or a lot, on virtualized platforms) should
we expect 'need_resched' to have changed.
Not-Yet-Signed-off-by: Ingo Molnar <mingo@...nel.org>
---
kernel/locking/mutex.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 68c750f4e8e8..45d2445e457a 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -225,9 +225,12 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
static noinline
bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
{
- int on_cpu;
+ int owner_on_cpu;
+
+ for (;;) {
+ if (unlikely(lock->owner != owner))
+ return true;
- while (lock->owner == owner) {
/*
* Use the non-faulting copy-user primitive to get the owner->on_cpu
* value that works even on CONFIG_DEBUG_PAGEALLOC=y instrumented
@@ -251,15 +254,16 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
* NOTE2: We ignore failed copies, as the next iteration will clean
* up after us. This saves an extra branch in the common case.
*/
- get_kernel(on_cpu, &owner->on_cpu);
-
- if (!on_cpu || need_resched())
+ get_kernel(owner_on_cpu, &owner->on_cpu);
+ if (unlikely(!owner_on_cpu))
return false;
cpu_relax_lowlatency();
- }
- return true;
+ /* Stop spinning if we are to be preempted: */
+ if (need_resched())
+ return false;
+ }
}
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists