lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 10 Apr 2015 18:11:35 +0200
From:	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To:	Thomas Gleixner <tglx@...utronix.de>
Cc:	linux-kernel@...r.kernel.org,
	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Darren Hart <darren@...art.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	fredrik.markstrom@...driver.com,
	Davidlohr Bueso <dave@...olabs.net>,
	Manfred Spraul <manfred@...orfullife.com>
Subject: [PATCH 2/3 v2] futex: avoid double wake up in futex_wake() on -RT

futex_wake() wakes the waiter while holding the hb->lock. The waiter
does not take the hb->lock and can leave the kernel. However the next
operation the same futex operation will point to the same hb->lock and
we will see a small dance around the lock including prio-boosting and
context switch:

low prio task FUTEX_WAKE on high prio
| ft-1489  [000] ....1..    81.167501: sys_enter: sys_futex (8049f60, 1, 1, 0, 0, 0)
| ft-1489  [000] dN..311    81.167504: sched_wakeup: pid=1490 prio=94
| ft-1489  [000] d...311    81.167520: sched_switch: prev_pid=1489 prev_prio=120 prev_state=R+ ==> next_pid=1490 next_prio=94
| ft-1490  [000] ....1..    81.167522: sys_exit: sys_futex = 0

prio task FUTEX_WAKE on low prio
| ft-1490  [000] ....1..    81.167528: sys_enter: sys_futex (8049f5c, 1, 1, 0, 0, 0)
| ft-1490  [000] ....1..    81.167530: sys_exit: sys_futex = 0

prio task waits FUTEX_WAIT, hb->lock still owned by low prio task
| ft-1490  [000] ....1..    81.167534: sys_enter: sys_futex (8049f60, 0, 1, 0, 0, 0)
| ft-1490  [000] d...411    81.167895: sched_pi_setprio: pid=1489 oldprio=120 newprio=94
| ft-1490  [000] d...311    81.167901: sched_switch: prev_pid=1490 prev_prio=94 prev_state=D ==> next_pid=1489 next_prio=94
| ft-1489  [000] d...411    81.167910: sched_wakeup: pid=1490 prio=94
| ft-1489  [000] d...311    81.167912: sched_pi_setprio: pid=1489 oldprio=94 newprio=120
| ft-1489  [000] d...311    81.167915: sched_switch: prev_pid=1489 prev_prio=120 prev_state=R+ ==> next_pid=1490 next_prio=94
| ft-1490  [000] d...3..    81.167922: sched_switch: prev_pid=1490 prev_prio=94 prev_state=S ==> next_pid=1489 next_prio=120
| ft-1489  [000] ....1..    81.167924: sys_exit: sys_futex = 1

This patch delays the wakeup of the process untill the hb->lock is
dropped to avoid boosting + context switch to obtain the lock.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
v1…v2:
    - update patch description
    - move the comment to __wake_futex()
    - move the wakeup before the out_put_key label in futex_wake()

 kernel/futex.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index b38abe3573a8..658f4d05cd6f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1092,12 +1092,12 @@ static void __unqueue_futex(struct futex_q *q)
  * The hash bucket lock must be held when this is called.
  * Afterwards, the futex_q must not be accessed.
  */
-static void wake_futex(struct futex_q *q)
+static struct task_struct *__wake_futex(struct futex_q *q)
 {
 	struct task_struct *p = q->task;
 
 	if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
-		return;
+		return NULL;
 
 	/*
 	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
@@ -1117,6 +1117,15 @@ static void wake_futex(struct futex_q *q)
 	 */
 	smp_wmb();
 	q->lock_ptr = NULL;
+	return p;
+}
+
+static void wake_futex(struct futex_q *q)
+{
+	struct task_struct *p = __wake_futex(q);
+
+	if (!p)
+		return;
 
 	wake_up_state(p, TASK_NORMAL);
 	put_task_struct(p);
@@ -1228,6 +1237,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
 	union futex_key key = FUTEX_KEY_INIT;
+	struct task_struct *waiter = NULL;
 	int ret;
 
 	if (!bitset)
@@ -1256,13 +1266,21 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 			if (!(this->bitset & bitset))
 				continue;
 
-			wake_futex(this);
+			if (nr_wake == 1)
+				waiter = __wake_futex(this);
+			else
+				wake_futex(this);
 			if (++ret >= nr_wake)
 				break;
 		}
 	}
 
 	spin_unlock(&hb->lock);
+	if (waiter) {
+		wake_up_state(waiter, TASK_NORMAL);
+		put_task_struct(waiter);
+	}
+
 out_put_key:
 	put_futex_key(&key);
 out:
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ