lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241203164335.1125381-6-bigeasy@linutronix.de>
Date: Tue,  3 Dec 2024 17:42:13 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
	Darren Hart <dvhart@...radead.org>,
	Davidlohr Bueso <dave@...olabs.net>,
	Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Waiman Long <longman@...hat.com>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [PATCH v4 05/11] futex: Track the futex hash bucket.

Add futex_hash_get/put() to keep the assigned hash_bucket around while a
futex operation is performed. Have RCU lifetime guarantee for
futex_hash_bucket_private.

This is should have the right amount of gets/ puts so that the private
hash bucket is released on exit. This is preparatory work to allow
change the hash bucket at runtime.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 include/linux/futex.h    |   2 +-
 include/linux/mm_types.h |   5 +-
 kernel/futex/core.c      | 104 +++++++++++++++++++++++++++++++++------
 kernel/futex/futex.h     |   8 +++
 kernel/futex/pi.c        |   7 +++
 kernel/futex/requeue.c   |  16 ++++++
 kernel/futex/waitwake.c  |  15 +++++-
 7 files changed, 136 insertions(+), 21 deletions(-)

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 61e81b866d34e..359fc24eb37ff 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -84,7 +84,7 @@ void futex_hash_free(struct mm_struct *mm);
 
 static inline void futex_mm_init(struct mm_struct *mm)
 {
-	mm->futex_hash_bucket = NULL;
+	rcu_assign_pointer(mm->futex_hash_bucket, NULL);
 }
 
 #else
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b16b97ab8fb2a..4f39928631042 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -30,7 +30,7 @@
 #define INIT_PASID	0
 
 struct address_space;
-struct futex_hash_bucket;
+struct futex_hash_bucket_private;
 struct mem_cgroup;
 
 /*
@@ -903,8 +903,7 @@ struct mm_struct {
 		int mm_lock_seq;
 #endif
 
-		unsigned int			futex_hash_mask;
-		struct futex_hash_bucket	*futex_hash_bucket;
+		struct futex_hash_bucket_private	__rcu *futex_hash_bucket;
 
 		unsigned long hiwater_rss; /* High-watermark of RSS usage */
 		unsigned long hiwater_vm;  /* High-water virtual memory usage */
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 14251bbafaffb..464918d85395e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -40,6 +40,7 @@
 #include <linux/fault-inject.h>
 #include <linux/slab.h>
 #include <linux/prctl.h>
+#include <linux/rcuref.h>
 
 #include "futex.h"
 #include "../locking/rtmutex_common.h"
@@ -56,6 +57,12 @@ static struct {
 #define futex_queues   (__futex_data.queues)
 #define futex_hashsize (__futex_data.hashsize)
 
+struct futex_hash_bucket_private {
+	rcuref_t	users;
+	unsigned int	hash_mask;
+	struct rcu_head rcu;
+	struct futex_hash_bucket queues[];
+};
 
 /*
  * Fault injections for futexes.
@@ -127,17 +134,24 @@ static inline bool futex_key_is_private(union futex_key *key)
  */
 struct futex_hash_bucket *futex_hash(union futex_key *key)
 {
-	struct futex_hash_bucket *fhb;
+	struct futex_hash_bucket_private *hb_p = NULL;
 	u32 hash;
 
-	fhb = current->mm->futex_hash_bucket;
-	if (fhb && futex_key_is_private(key)) {
-		u32 hash_mask = current->mm->futex_hash_mask;
+	if (futex_key_is_private(key)) {
+		guard(rcu)();
+
+		do {
+			hb_p = rcu_dereference(current->mm->futex_hash_bucket);
+		} while (hb_p && !rcuref_get(&hb_p->users));
+	}
+
+	if (hb_p) {
+		u32 hash_mask = hb_p->hash_mask;
 
 		hash = jhash2((void *)&key->private.address,
 			      sizeof(key->private.address) / 4,
 			      key->both.offset);
-		return &fhb[hash & hash_mask];
+		return &hb_p->queues[hash & hash_mask];
 	}
 	hash = jhash2((u32 *)key,
 		      offsetof(typeof(*key), both.offset) / 4,
@@ -145,6 +159,35 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
 	return &futex_queues[hash & (futex_hashsize - 1)];
 }
 
+static void futex_hash_priv_put(struct futex_hash_bucket_private *hb_p)
+{
+	if (rcuref_put(&hb_p->users))
+		kvfree_rcu(hb_p, rcu);
+}
+
+void futex_hash_put(struct futex_hash_bucket *hb)
+{
+	struct futex_hash_bucket_private *hb_p;
+
+	if (hb->hb_slot == 0)
+		return;
+	hb_p = container_of(hb, struct futex_hash_bucket_private,
+			    queues[hb->hb_slot - 1]);
+	futex_hash_priv_put(hb_p);
+}
+
+void futex_hash_get(struct futex_hash_bucket *hb)
+{
+	struct futex_hash_bucket_private *hb_p;
+
+	if (hb->hb_slot == 0)
+		return;
+
+	hb_p = container_of(hb, struct futex_hash_bucket_private,
+			    queues[hb->hb_slot - 1]);
+	/* The ref needs to be owned by the caller so this can't fail */
+	WARN_ON_ONCE(!rcuref_get(&hb_p->users));
+}
 
 /**
  * futex_setup_timer - set up the sleeping hrtimer.
@@ -599,7 +642,10 @@ int futex_unqueue(struct futex_q *q)
 	 */
 	lock_ptr = READ_ONCE(q->lock_ptr);
 	if (lock_ptr != NULL) {
+		struct futex_hash_bucket *hb;
+
 		spin_lock(lock_ptr);
+		hb = futex_hb_from_futex_q(q);
 		/*
 		 * q->lock_ptr can change between reading it and
 		 * spin_lock(), causing us to take the wrong lock.  This
@@ -622,6 +668,7 @@ int futex_unqueue(struct futex_q *q)
 		BUG_ON(q->pi_state);
 
 		spin_unlock(lock_ptr);
+		futex_hash_put(hb);
 		ret = 1;
 	}
 
@@ -999,6 +1046,7 @@ static void exit_pi_state_list(struct task_struct *curr)
 		if (!refcount_inc_not_zero(&pi_state->refcount)) {
 			raw_spin_unlock_irq(&curr->pi_lock);
 			cpu_relax();
+			futex_hash_put(hb);
 			raw_spin_lock_irq(&curr->pi_lock);
 			continue;
 		}
@@ -1015,6 +1063,7 @@ static void exit_pi_state_list(struct task_struct *curr)
 			/* retain curr->pi_lock for the loop invariant */
 			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 			spin_unlock(&hb->lock);
+			futex_hash_put(hb);
 			put_pi_state(pi_state);
 			continue;
 		}
@@ -1027,6 +1076,7 @@ static void exit_pi_state_list(struct task_struct *curr)
 		raw_spin_unlock(&curr->pi_lock);
 		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 		spin_unlock(&hb->lock);
+		futex_hash_put(hb);
 
 		rt_mutex_futex_unlock(&pi_state->pi_mutex);
 		put_pi_state(pi_state);
@@ -1147,8 +1197,9 @@ void futex_exit_release(struct task_struct *tsk)
 	futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
 }
 
-static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
+static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, unsigned int slot)
 {
+	fhb->hb_slot = slot;
 	atomic_set(&fhb->waiters, 0);
 	plist_head_init(&fhb->chain);
 	spin_lock_init(&fhb->lock);
@@ -1156,12 +1207,20 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
 
 void futex_hash_free(struct mm_struct *mm)
 {
-	kvfree(mm->futex_hash_bucket);
+	struct futex_hash_bucket_private *hb_p;
+
+	/* own a reference */
+	hb_p = rcu_dereference_check(mm->futex_hash_bucket, true);
+	if (!hb_p)
+		return;
+	WARN_ON(rcuref_read(&hb_p->users) != 1);
+	futex_hash_priv_put(hb_p);
 }
 
 static int futex_hash_allocate(unsigned int hash_slots)
 {
-	struct futex_hash_bucket *fhb;
+	struct futex_hash_bucket_private *hb_p;
+	size_t alloc_size;
 	int i;
 
 	if (current->mm->futex_hash_bucket)
@@ -1179,16 +1238,25 @@ static int futex_hash_allocate(unsigned int hash_slots)
 	if (!is_power_of_2(hash_slots))
 		hash_slots = rounddown_pow_of_two(hash_slots);
 
-	fhb = kvmalloc_array(hash_slots, sizeof(struct futex_hash_bucket), GFP_KERNEL_ACCOUNT);
-	if (!fhb)
+	if (unlikely(check_mul_overflow(hash_slots, sizeof(struct futex_hash_bucket),
+					&alloc_size)))
 		return -ENOMEM;
 
-	current->mm->futex_hash_mask = hash_slots - 1;
+	if (unlikely(check_add_overflow(alloc_size, sizeof(struct futex_hash_bucket_private),
+					&alloc_size)))
+		return -ENOMEM;
+
+	hb_p = kvmalloc(alloc_size, GFP_KERNEL_ACCOUNT);
+	if (!hb_p)
+		return -ENOMEM;
+
+	rcuref_init(&hb_p->users, 1);
+	hb_p->hash_mask = hash_slots - 1;
 
 	for (i = 0; i < hash_slots; i++)
-		futex_hash_bucket_init(&fhb[i]);
+		futex_hash_bucket_init(&hb_p->queues[i], i + 1);
 
-	current->mm->futex_hash_bucket = fhb;
+	rcu_assign_pointer(current->mm->futex_hash_bucket, hb_p);
 	return 0;
 }
 
@@ -1199,8 +1267,12 @@ int futex_hash_allocate_default(void)
 
 static int futex_hash_get_slots(void)
 {
-	if (current->mm->futex_hash_bucket)
-		return current->mm->futex_hash_mask + 1;
+	struct futex_hash_bucket_private *hb_p;
+
+	guard(rcu)();
+	hb_p = rcu_dereference(current->mm->futex_hash_bucket);
+	if (hb_p)
+		return hb_p->hash_mask + 1;
 	return 0;
 }
 
@@ -1243,7 +1315,7 @@ static int __init futex_init(void)
 	futex_hashsize = 1UL << futex_shift;
 
 	for (i = 0; i < futex_hashsize; i++)
-		futex_hash_bucket_init(&futex_queues[i]);
+		futex_hash_bucket_init(&futex_queues[i], 0);
 
 	return 0;
 }
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 618ce1fe870e9..ceea260ad9e80 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -115,6 +115,7 @@ static inline bool should_fail_futex(bool fshared)
  */
 struct futex_hash_bucket {
 	atomic_t waiters;
+	unsigned int hb_slot;
 	spinlock_t lock;
 	struct plist_head chain;
 } ____cacheline_aligned_in_smp;
@@ -202,6 +203,13 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 		  int flags, u64 range_ns);
 
 extern struct futex_hash_bucket *futex_hash(union futex_key *key);
+extern void futex_hash_put(struct futex_hash_bucket *hb);
+extern void futex_hash_get(struct futex_hash_bucket *hb);
+
+static inline struct futex_hash_bucket *futex_hb_from_futex_q(struct futex_q *q)
+{
+	return container_of(q->lock_ptr, struct futex_hash_bucket, lock);
+}
 
 /**
  * futex_match - Check whether two futex keys are equal
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index d62cca5ed8f4c..60a62ab250b08 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -964,6 +964,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 			 * - EAGAIN: The user space value changed.
 			 */
 			futex_q_unlock(hb);
+			futex_hash_put(hb);
 			/*
 			 * Handle the case where the owner is in the middle of
 			 * exiting. Wait for the exit to complete otherwise
@@ -1083,10 +1084,12 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 
 	futex_unqueue_pi(&q);
 	spin_unlock(q.lock_ptr);
+	futex_hash_put(hb);
 	goto out;
 
 out_unlock_put_key:
 	futex_q_unlock(hb);
+	futex_hash_put(hb);
 
 out:
 	if (to) {
@@ -1097,6 +1100,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
 
 uaddr_faulted:
 	futex_q_unlock(hb);
+	futex_hash_put(hb);
 
 	ret = fault_in_user_writeable(uaddr);
 	if (ret)
@@ -1197,6 +1201,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 
 		get_pi_state(pi_state);
 		spin_unlock(&hb->lock);
+		futex_hash_put(hb);
 
 		/* drops pi_state->pi_mutex.wait_lock */
 		ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);
@@ -1236,6 +1241,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 	 */
 	if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
 		spin_unlock(&hb->lock);
+		futex_hash_put(hb);
 		switch (ret) {
 		case -EFAULT:
 			goto pi_faulted;
@@ -1256,6 +1262,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 
 out_unlock:
 	spin_unlock(&hb->lock);
+	futex_hash_put(hb);
 	return ret;
 
 pi_retry:
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index b47bb764b3520..39e96f1bef8ce 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -87,6 +87,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 		futex_hb_waiters_inc(hb2);
 		plist_add(&q->list, &hb2->chain);
 		q->lock_ptr = &hb2->lock;
+		futex_hash_put(hb1);
+		futex_hash_get(hb2);
 	}
 	q->key = *key2;
 }
@@ -231,8 +233,10 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
 
 	WARN_ON(!q->rt_waiter);
 	q->rt_waiter = NULL;
+	futex_hash_put(futex_hb_from_futex_q(q));
 
 	q->lock_ptr = &hb->lock;
+	futex_hash_get(hb);
 
 	/* Signal locked state to the waiter */
 	futex_requeue_pi_complete(q, 1);
@@ -458,6 +462,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
 		if (unlikely(ret)) {
 			double_unlock_hb(hb1, hb2);
 			futex_hb_waiters_dec(hb2);
+			futex_hash_put(hb1);
+			futex_hash_put(hb2);
 
 			ret = get_user(curval, uaddr1);
 			if (ret)
@@ -544,6 +550,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
 		case -EFAULT:
 			double_unlock_hb(hb1, hb2);
 			futex_hb_waiters_dec(hb2);
+			futex_hash_put(hb1);
+			futex_hash_put(hb2);
 			ret = fault_in_user_writeable(uaddr2);
 			if (!ret)
 				goto retry;
@@ -558,6 +566,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
 			 */
 			double_unlock_hb(hb1, hb2);
 			futex_hb_waiters_dec(hb2);
+			futex_hash_put(hb1);
+			futex_hash_put(hb2);
 			/*
 			 * Handle the case where the owner is in the middle of
 			 * exiting. Wait for the exit to complete otherwise
@@ -677,6 +687,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
 	double_unlock_hb(hb1, hb2);
 	wake_up_q(&wake_q);
 	futex_hb_waiters_dec(hb2);
+	futex_hash_put(hb1);
+	futex_hash_put(hb2);
 	return ret ? ret : task_count;
 }
 
@@ -815,6 +827,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 	 */
 	if (futex_match(&q.key, &key2)) {
 		futex_q_unlock(hb);
+		futex_hash_put(hb);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -828,6 +841,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		spin_lock(&hb->lock);
 		ret = handle_early_requeue_pi_wakeup(hb, &q, to);
 		spin_unlock(&hb->lock);
+		futex_hash_put(hb);
 		break;
 
 	case Q_REQUEUE_PI_LOCKED:
@@ -847,6 +861,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 			 */
 			ret = ret < 0 ? ret : 0;
 		}
+		futex_hash_put(futex_hb_from_futex_q(&q));
 		break;
 
 	case Q_REQUEUE_PI_DONE:
@@ -876,6 +891,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 
 		futex_unqueue_pi(&q);
 		spin_unlock(q.lock_ptr);
+		futex_hash_put(futex_hb_from_futex_q(&q));
 
 		if (ret == -EINTR) {
 			/*
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 3a10375d95218..1f2d11eb7f89f 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -113,6 +113,8 @@ bool __futex_wake_mark(struct futex_q *q)
 		return false;
 
 	__futex_unqueue(q);
+	/* Waiters reference */
+	futex_hash_put(futex_hb_from_futex_q(q));
 	/*
 	 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
 	 * is written, without taking any locks. This is possible in the event
@@ -173,8 +175,10 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	hb = futex_hash(&key);
 
 	/* Make sure we really have tasks to wakeup */
-	if (!futex_hb_waiters_pending(hb))
+	if (!futex_hb_waiters_pending(hb)) {
+		futex_hash_put(hb);
 		return ret;
+	}
 
 	spin_lock(&hb->lock);
 
@@ -196,6 +200,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	}
 
 	spin_unlock(&hb->lock);
+	futex_hash_put(hb);
 	wake_up_q(&wake_q);
 	return ret;
 }
@@ -275,6 +280,8 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
 	op_ret = futex_atomic_op_inuser(op, uaddr2);
 	if (unlikely(op_ret < 0)) {
 		double_unlock_hb(hb1, hb2);
+		futex_hash_put(hb1);
+		futex_hash_put(hb2);
 
 		if (!IS_ENABLED(CONFIG_MMU) ||
 		    unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
@@ -329,6 +336,8 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
 out_unlock:
 	double_unlock_hb(hb1, hb2);
 	wake_up_q(&wake_q);
+	futex_hash_put(hb1);
+	futex_hash_put(hb2);
 	return ret;
 }
 
@@ -466,6 +475,8 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
 		}
 
 		futex_q_unlock(hb);
+		futex_hash_put(hb);
+
 		__set_current_state(TASK_RUNNING);
 
 		/*
@@ -625,6 +636,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
 
 	if (ret) {
 		futex_q_unlock(*hb);
+		futex_hash_put(*hb);
 
 		ret = get_user(uval, uaddr);
 		if (ret)
@@ -638,6 +650,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
 
 	if (uval != val) {
 		futex_q_unlock(*hb);
+		futex_hash_put(*hb);
 		ret = -EWOULDBLOCK;
 	}
 
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ