lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241215230642.104118-9-bigeasy@linutronix.de>
Date: Mon, 16 Dec 2024 00:00:12 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
	Darren Hart <dvhart@...radead.org>,
	Davidlohr Bueso <dave@...olabs.net>,
	Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Waiman Long <longman@...hat.com>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [PATCH v5 08/14] futex: Introduce futex_get_locked_hb().

futex_lock_pi() and __fixup_pi_state_owner() acquire the
futex_q::lock_ptr without holding a reference assuming the previously
obtained hb and the assigned lock_ptr are still valid. This isn't the
case once the hb can be resized and becomes invalid after the reference
drop.

Introduce futex_get_locked_hb() to lock the hb recorded in
futex_q::lock_ptr. The lock pointer is read in a RCU section to ensure
that it does not go away if the hb has been replaced and the old pointer
has been observed. After locking the pointer needs to be compared
to check if it changed. If so then the hb has been replaced and the user
has been moved to the new one and lock_ptr has been updated. The lock
operation needs to be redone in this case.
Once the lock_ptr is the same, we can return the futex_hash_bucket it
belongs to as the hb for the caller locked. This is important because we
don't own a reference so the hb is valid as long as we hold the lock.
This means if the hb is resized then this (old) hb remains valid as long
as we hold the lock because it all user need to be moved to the new
lock. So the task performing the resize will block.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 kernel/futex/core.c    | 27 +++++++++++++++++++++++++++
 kernel/futex/futex.h   |  2 +-
 kernel/futex/pi.c      |  9 +++++++--
 kernel/futex/requeue.c |  8 +++++---
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 3cfdd4c02f261..6bccf48cdb049 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -639,6 +639,33 @@ int futex_unqueue(struct futex_q *q)
 	return ret;
 }
 
+struct futex_hash_bucket *futex_get_locked_hb(struct futex_q *q)
+{
+	struct futex_hash_bucket *hb;
+	spinlock_t *lock_ptr;
+
+	/*
+	 * See futex_unqueue() why lock_ptr can change.
+	 */
+	guard(rcu)();
+retry:
+	lock_ptr = READ_ONCE(q->lock_ptr);
+	spin_lock(lock_ptr);
+
+	if (unlikely(lock_ptr != q->lock_ptr)) {
+		spin_unlock(lock_ptr);
+		goto retry;
+	}
+
+	hb = container_of(lock_ptr, struct futex_hash_bucket, lock);
+	/*
+	 * We don't acquire a reference on the hb because we don't get it
+	 * if a resize is in progress and we got the old hb->lock before the
+	 * other task got it which meant to move us to the new hb.
+	 */
+	return hb;
+}
+
 /*
  * PI futexes can not be requeued and must remove themselves from the hash
  * bucket. The hash bucket lock (i.e. lock_ptr) is held.
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 5793546a48ebf..143bf1523fa4a 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -196,7 +196,7 @@ enum futex_access {
 
 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
 			 enum futex_access rw);
-
+extern struct futex_hash_bucket *futex_get_locked_hb(struct futex_q *q);
 extern struct hrtimer_sleeper *
 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 		  int flags, u64 range_ns);
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index 8561f94f21ed9..506ba1ad8ff23 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -806,7 +806,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 		break;
 	}
 
-	spin_lock(q->lock_ptr);
+	futex_get_locked_hb(q);
 	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
 
 	/*
@@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 	struct rt_mutex_waiter rt_waiter;
 	struct futex_hash_bucket *hb;
 	struct futex_q q = futex_q_init;
+	bool no_block_fp = false;
 	DEFINE_WAKE_Q(wake_q);
 	int res, ret;
 
@@ -988,6 +989,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 		ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
 		/* Fixup the trylock return value: */
 		ret = ret ? 0 : -EWOULDBLOCK;
+		no_block_fp = true;
 		goto no_block;
 	}
 
@@ -1024,6 +1026,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 	raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
 	wake_up_q(&wake_q);
 	preempt_enable();
+	futex_hash_put(hb);
 
 	if (ret) {
 		if (ret == 1)
@@ -1063,7 +1066,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 	 * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
 	 * the
 	 */
-	spin_lock(q.lock_ptr);
+	hb = futex_get_locked_hb(&q);
 	/*
 	 * Waiter is unqueued.
 	 */
@@ -1083,6 +1086,8 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 
 	futex_unqueue_pi(&q);
 	spin_unlock(q.lock_ptr);
+	if (no_block_fp)
+		futex_hash_put(hb);
 	goto out;
 
 out_unlock_put_key:
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index 0395740ce5e71..1f3ac76ce1229 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -826,15 +826,17 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 	switch (futex_requeue_pi_wakeup_sync(&q)) {
 	case Q_REQUEUE_PI_IGNORE:
 		/* The waiter is still on uaddr1 */
-		spin_lock(&hb->lock);
+		hb = futex_get_locked_hb(&q);
+
 		ret = handle_early_requeue_pi_wakeup(hb, &q, to);
 		spin_unlock(&hb->lock);
+
 		break;
 
 	case Q_REQUEUE_PI_LOCKED:
 		/* The requeue acquired the lock */
 		if (q.pi_state && (q.pi_state->owner != current)) {
-			spin_lock(q.lock_ptr);
+			futex_get_locked_hb(&q);
 			ret = fixup_pi_owner(uaddr2, &q, true);
 			/*
 			 * Drop the reference to the pi state which the
@@ -861,7 +863,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
 			ret = 0;
 
-		spin_lock(q.lock_ptr);
+		futex_get_locked_hb(&q);
 		debug_rt_mutex_free_waiter(&rt_waiter);
 		/*
 		 * Fixup the pi_state owner and possibly acquire the lock if we
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ