[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150410161135.GF3057@linutronix.de>
Date: Fri, 10 Apr 2015 18:11:35 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: linux-kernel@...r.kernel.org,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Darren Hart <darren@...art.com>,
Steven Rostedt <rostedt@...dmis.org>,
fredrik.markstrom@...driver.com,
Davidlohr Bueso <dave@...olabs.net>,
Manfred Spraul <manfred@...orfullife.com>
Subject: [PATCH 2/3 v2] futex: avoid double wake up in futex_wake() on -RT
futex_wake() wakes the waiter while holding the hb->lock. The waiter
does not take the hb->lock and can leave the kernel. However the next
operation the same futex operation will point to the same hb->lock and
we will see a small dance around the lock including prio-boosting and
context switch:
low prio task FUTEX_WAKE on high prio
| ft-1489 [000] ....1.. 81.167501: sys_enter: sys_futex (8049f60, 1, 1, 0, 0, 0)
| ft-1489 [000] dN..311 81.167504: sched_wakeup: pid=1490 prio=94
| ft-1489 [000] d...311 81.167520: sched_switch: prev_pid=1489 prev_prio=120 prev_state=R+ ==> next_pid=1490 next_prio=94
| ft-1490 [000] ....1.. 81.167522: sys_exit: sys_futex = 0
prio task FUTEX_WAKE on low prio
| ft-1490 [000] ....1.. 81.167528: sys_enter: sys_futex (8049f5c, 1, 1, 0, 0, 0)
| ft-1490 [000] ....1.. 81.167530: sys_exit: sys_futex = 0
prio task waits FUTEX_WAIT, hb->lock still owned by low prio task
| ft-1490 [000] ....1.. 81.167534: sys_enter: sys_futex (8049f60, 0, 1, 0, 0, 0)
| ft-1490 [000] d...411 81.167895: sched_pi_setprio: pid=1489 oldprio=120 newprio=94
| ft-1490 [000] d...311 81.167901: sched_switch: prev_pid=1490 prev_prio=94 prev_state=D ==> next_pid=1489 next_prio=94
| ft-1489 [000] d...411 81.167910: sched_wakeup: pid=1490 prio=94
| ft-1489 [000] d...311 81.167912: sched_pi_setprio: pid=1489 oldprio=94 newprio=120
| ft-1489 [000] d...311 81.167915: sched_switch: prev_pid=1489 prev_prio=120 prev_state=R+ ==> next_pid=1490 next_prio=94
| ft-1490 [000] d...3.. 81.167922: sched_switch: prev_pid=1490 prev_prio=94 prev_state=S ==> next_pid=1489 next_prio=120
| ft-1489 [000] ....1.. 81.167924: sys_exit: sys_futex = 1
This patch delays the wakeup of the process untill the hb->lock is
dropped to avoid boosting + context switch to obtain the lock.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
v1…v2:
- update patch description
- move the comment to __wake_futex()
- move the wakeup before the out_put_key label in futex_wake()
kernel/futex.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index b38abe3573a8..658f4d05cd6f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1092,12 +1092,12 @@ static void __unqueue_futex(struct futex_q *q)
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.
*/
-static void wake_futex(struct futex_q *q)
+static struct task_struct *__wake_futex(struct futex_q *q)
{
struct task_struct *p = q->task;
if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
- return;
+ return NULL;
/*
* We set q->lock_ptr = NULL _before_ we wake up the task. If
@@ -1117,6 +1117,15 @@ static void wake_futex(struct futex_q *q)
*/
smp_wmb();
q->lock_ptr = NULL;
+ return p;
+}
+
+static void wake_futex(struct futex_q *q)
+{
+ struct task_struct *p = __wake_futex(q);
+
+ if (!p)
+ return;
wake_up_state(p, TASK_NORMAL);
put_task_struct(p);
@@ -1228,6 +1237,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
union futex_key key = FUTEX_KEY_INIT;
+ struct task_struct *waiter = NULL;
int ret;
if (!bitset)
@@ -1256,13 +1266,21 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!(this->bitset & bitset))
continue;
- wake_futex(this);
+ if (nr_wake == 1)
+ waiter = __wake_futex(this);
+ else
+ wake_futex(this);
if (++ret >= nr_wake)
break;
}
}
spin_unlock(&hb->lock);
+ if (waiter) {
+ wake_up_state(waiter, TASK_NORMAL);
+ put_task_struct(waiter);
+ }
+
out_put_key:
put_futex_key(&key);
out:
--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists