[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241115172035.795842-7-bigeasy@linutronix.de>
Date: Fri, 15 Nov 2024 17:58:47 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
Darren Hart <dvhart@...radead.org>,
Davidlohr Bueso <dave@...olabs.net>,
Ingo Molnar <mingo@...hat.com>,
Juri Lelli <juri.lelli@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Valentin Schneider <vschneid@...hat.com>,
Waiman Long <longman@...hat.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [RFC PATCH v3 6/9] futex: Allow to re-allocate the private hash bucket.
The mm_struct::futex_hash_lock guards the futex_hash_bucket assignment/
replacement. The futex_hash_allocate()/ PR_FUTEX_HASH_SET_SLOTS
operation can now be invoked at runtime and resize the internal private
futex_hash_bucket to another size.
The idea is to use the recently introduced ref counting to keep a valid
HB around. On resize/ replacement the new HB is assigned and all users
currently queued on hb will get poked so they can requeue themself.
This has been only tested with FUTEX_LOCK_PI.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
include/linux/futex.h | 1 +
include/linux/mm_types.h | 1 +
kernel/futex/core.c | 64 ++++++++++++++++++++++++++++-----
kernel/futex/futex.h | 1 +
kernel/futex/pi.c | 25 +++++++++++++
kernel/locking/rtmutex.c | 26 ++++++++++++++
kernel/locking/rtmutex_common.h | 2 ++
7 files changed, 111 insertions(+), 9 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 359fc24eb37ff..838a5a6be0444 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -85,6 +85,7 @@ void futex_hash_free(struct mm_struct *mm);
static inline void futex_mm_init(struct mm_struct *mm)
{
rcu_assign_pointer(mm->futex_hash_bucket, NULL);
+ mutex_init(&mm->futex_hash_lock);
}
#else
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 057ad1de59ca0..5bf86ea363780 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -899,6 +899,7 @@ struct mm_struct {
int mm_lock_seq;
#endif
+ struct mutex futex_hash_lock;
struct futex_hash_bucket_private __rcu *futex_hash_bucket;
unsigned long hiwater_rss; /* High-watermark of RSS usage */
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index cff5652a29917..70d4b1d93bbb8 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -595,6 +595,7 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
{
struct futex_hash_bucket *hb;
+try_again:
hb = futex_hash(&q->key);
/*
@@ -610,7 +611,13 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
q->lock_ptr = &hb->lock;
spin_lock(&hb->lock);
- return hb;
+ if (futex_check_hb_valid(hb))
+ return hb;
+
+ futex_hb_waiters_dec(hb);
+ spin_unlock(&hb->lock);
+ futex_hash_put(hb);
+ goto try_again;
}
void futex_q_unlock(struct futex_hash_bucket *hb)
@@ -1238,18 +1245,50 @@ void futex_hash_free(struct mm_struct *mm)
futex_hash_priv_put(hb_p);
}
+static void futex_put_old_hb_p(struct futex_hash_bucket_private *hb_p)
+{
+ unsigned int slots = hb_p->hash_mask + 1;
+ struct futex_hash_bucket *hb;
+ DEFINE_WAKE_Q(wake_q);
+ unsigned int i;
+
+ for (i = 0; i < slots; i++) {
+ struct futex_q *this;
+
+ hb = &hb_p->queues[i];
+
+ spin_lock(&hb->lock);
+ plist_for_each_entry(this, &hb->chain, list)
+ wake_q_add(&wake_q, this->task);
+ spin_unlock(&hb->lock);
+ }
+ futex_hash_priv_put(hb_p);
+
+ wake_up_q(&wake_q);
+}
+
+bool futex_check_hb_valid(struct futex_hash_bucket *hb)
+{
+ struct futex_hash_bucket_private *hb_p_now;
+ struct futex_hash_bucket_private *hb_p;
+
+ if (hb->hb_slot == 0)
+ return true;
+ guard(rcu)();
+ hb_p_now = rcu_dereference(current->mm->futex_hash_bucket);
+ hb_p = container_of(hb, struct futex_hash_bucket_private,
+ queues[hb->hb_slot - 1]);
+
+ return hb_p_now == hb_p;
+}
+
static int futex_hash_allocate(unsigned int hash_slots)
{
- struct futex_hash_bucket_private *hb_p;
+ struct futex_hash_bucket_private *hb_p, *hb_p_old = NULL;
+ struct mm_struct *mm;
size_t alloc_size;
int i;
- if (current->mm->futex_hash_bucket)
- return -EALREADY;
-
- if (!thread_group_leader(current))
- return -EINVAL;
-
if (hash_slots < 2)
hash_slots = 2;
if (hash_slots > 131072)
@@ -1277,7 +1316,14 @@ static int futex_hash_allocate(unsigned int hash_slots)
hb_p->queues[i].hb_slot = i + 1;
}
- rcu_assign_pointer(current->mm->futex_hash_bucket, hb_p);
+ mm = current->mm;
+ scoped_guard(mutex, &mm->futex_hash_lock) {
+ hb_p_old = rcu_dereference_check(mm->futex_hash_bucket,
+ lockdep_is_held(&mm->futex_hash_lock));
+ rcu_assign_pointer(mm->futex_hash_bucket, hb_p);
+ }
+ if (hb_p_old)
+ futex_put_old_hb_p(hb_p_old);
return 0;
}
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index c6d59949766d2..b974d675730e4 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -204,6 +204,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
extern void futex_hash_put(struct futex_hash_bucket *hb);
extern void futex_hash_get(struct futex_hash_bucket *hb);
+extern bool futex_check_hb_valid(struct futex_hash_bucket *hb);
static inline struct futex_hash_bucket *futex_hb_from_futex_q(struct futex_q *q)
{
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index 399ac712f1fd6..1a0a9cd31f911 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -998,6 +998,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
rt_mutex_pre_schedule();
rt_mutex_init_waiter(&rt_waiter);
+ rt_waiter.hb = hb;
/*
* On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
@@ -1066,6 +1067,23 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
*/
rt_mutex_post_schedule();
no_block:
+ if (!futex_check_hb_valid(hb)) {
+ /*
+ * We might got the lock, we might not. If the HB changed under
+ * us it was all for nothing. Try again from scratch.
+ */
+ futex_unqueue_pi(&q);
+ spin_unlock(q.lock_ptr);
+ futex_hash_put(hb);
+
+ if (to) {
+ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
+ }
+ if (refill_pi_state_cache())
+ return -ENOMEM;
+ goto retry_private;
+ }
/*
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
@@ -1226,6 +1244,12 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* space.
*/
return ret;
+ } else {
+ if (!futex_check_hb_valid(hb)) {
+ spin_unlock(&hb->lock);
+ futex_hash_put(hb);
+ goto retry;
+ }
}
/*
@@ -1250,6 +1274,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
return ret;
}
}
+ /* XXX if the HB changed but uval did not, we might need to check if there is a waiter pending */
/*
* If uval has changed, let user space handle it.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index ebebd0eec7f63..188a9b16412df 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -56,10 +56,29 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
return 0;
}
+extern bool futex_check_hb_valid(struct futex_hash_bucket *hb);
+
+static inline bool __internal_retry_reason(struct rt_mutex_waiter *waiter)
+{
+ if (!IS_ENABLED(CONFIG_FUTEX))
+ return false;
+
+ if (!waiter->hb)
+ return false;
+ if (futex_check_hb_valid(waiter->hb))
+ return false;
+ return true;
+}
+
#else
# define build_ww_mutex() (true)
# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
# include "ww_mutex.h"
+
+static inline bool __internal_retry_reason(struct rt_mutex_waiter *waiter)
+{
+ return false;
+}
#endif
/*
@@ -1626,6 +1645,13 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
break;
}
+ if (!build_ww_mutex()) {
+ if (__internal_retry_reason(waiter)) {
+ ret = -EAGAIN;
+ break;
+ }
+ }
+
if (waiter == rt_mutex_top_waiter(lock))
owner = rt_mutex_owner(lock);
else
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 1162e07cdaea1..fb26ad08f259a 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -56,6 +56,7 @@ struct rt_mutex_waiter {
struct rt_mutex_base *lock;
unsigned int wake_state;
struct ww_acquire_ctx *ww_ctx;
+ struct futex_hash_bucket *hb;
};
/**
@@ -215,6 +216,7 @@ static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
RB_CLEAR_NODE(&waiter->tree.entry);
waiter->wake_state = TASK_NORMAL;
waiter->task = NULL;
+ waiter->hb = NULL;
}
static inline void rt_mutex_init_rtlock_waiter(struct rt_mutex_waiter *waiter)
--
2.45.2
Powered by blists - more mailing lists