[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <337g63unj3bdmblohoue2m66i5mciykmngdxzjcn2ark7uvywv@cy5pn5c75prd>
Date: Mon, 25 Mar 2024 11:56:51 -0700
From: Davidlohr Bueso <dave@...olabs.net>
To: John Stultz <jstultz@...gle.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
Peter Zijlstra <peterz@...radead.org>, Joel Fernandes <joelaf@...gle.com>,
Qais Yousef <qyousef@...gle.com>, Ingo Molnar <mingo@...hat.com>,
Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>, Valentin Schneider <vschneid@...hat.com>,
Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
Zimuzo Ezeozue <zezeozue@...gle.com>, Youssef Esmat <youssefesmat@...gle.com>,
Mel Gorman <mgorman@...e.de>, Daniel Bristot de Oliveira <bristot@...hat.com>,
Will Deacon <will@...nel.org>, Waiman Long <longman@...hat.com>,
Boqun Feng <boqun.feng@...il.com>, "Paul E. McKenney" <paulmck@...nel.org>,
Metin Kaya <Metin.Kaya@....com>, Xuewen Yan <xuewen.yan94@...il.com>,
K Prateek Nayak <kprateek.nayak@....com>, Thomas Gleixner <tglx@...utronix.de>, kernel-team@...roid.com
Subject: Re: [PATCH v9 1/7] locking/mutex: Remove wakeups from under
mutex::wait_lock
On Thu, 14 Mar 2024, John Stultz wrote:
>From: Peter Zijlstra <peterz@...radead.org>
>
>In preparation to nest mutex::wait_lock under rq::lock we need to remove
>wakeups from under it.
Acked-by: Davidlohr Bueso <dave@...olabs.net>
>
>Cc: Joel Fernandes <joelaf@...gle.com>
>Cc: Qais Yousef <qyousef@...gle.com>
>Cc: Ingo Molnar <mingo@...hat.com>
>Cc: Peter Zijlstra <peterz@...radead.org>
>Cc: Juri Lelli <juri.lelli@...hat.com>
>Cc: Vincent Guittot <vincent.guittot@...aro.org>
>Cc: Dietmar Eggemann <dietmar.eggemann@....com>
>Cc: Valentin Schneider <vschneid@...hat.com>
>Cc: Steven Rostedt <rostedt@...dmis.org>
>Cc: Ben Segall <bsegall@...gle.com>
>Cc: Zimuzo Ezeozue <zezeozue@...gle.com>
>Cc: Youssef Esmat <youssefesmat@...gle.com>
>Cc: Mel Gorman <mgorman@...e.de>
>Cc: Daniel Bristot de Oliveira <bristot@...hat.com>
>Cc: Will Deacon <will@...nel.org>
>Cc: Waiman Long <longman@...hat.com>
>Cc: Boqun Feng <boqun.feng@...il.com>
>Cc: "Paul E. McKenney" <paulmck@...nel.org>
>Cc: Metin Kaya <Metin.Kaya@....com>
>Cc: Xuewen Yan <xuewen.yan94@...il.com>
>Cc: K Prateek Nayak <kprateek.nayak@....com>
>Cc: Thomas Gleixner <tglx@...utronix.de>
>Cc: kernel-team@...roid.com
>Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
>[Heavily changed after 55f036ca7e74 ("locking: WW mutex cleanup") and
>08295b3b5bee ("locking: Implement an algorithm choice for Wound-Wait
>mutexes")]
>Signed-off-by: Juri Lelli <juri.lelli@...hat.com>
>[jstultz: rebased to mainline, added extra wake_up_q & init
> to avoid hangs, similar to Connor's rework of this patch]
>Signed-off-by: John Stultz <jstultz@...gle.com>
>---
>v5:
>* Reverted back to an earlier version of this patch to undo
> the change that kept the wake_q in the ctx structure, as
> that broke the rule that the wake_q must always be on the
> stack, as its not safe for concurrency.
>v6:
>* Made tweaks suggested by Waiman Long
>v7:
>* Fixups to pass wake_qs down for PREEMPT_RT logic
>---
> kernel/locking/mutex.c | 17 +++++++++++++----
> kernel/locking/rtmutex.c | 26 +++++++++++++++++---------
> kernel/locking/rwbase_rt.c | 4 +++-
> kernel/locking/rwsem.c | 4 ++--
> kernel/locking/spinlock_rt.c | 3 ++-
> kernel/locking/ww_mutex.h | 29 ++++++++++++++++++-----------
> 6 files changed, 55 insertions(+), 28 deletions(-)
>
>diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
>index cbae8c0b89ab..980ce630232c 100644
>--- a/kernel/locking/mutex.c
>+++ b/kernel/locking/mutex.c
>@@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> struct lockdep_map *nest_lock, unsigned long ip,
> struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
> {
>+ DEFINE_WAKE_Q(wake_q);
> struct mutex_waiter waiter;
> struct ww_mutex *ww;
> int ret;
>@@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> */
> if (__mutex_trylock(lock)) {
> if (ww_ctx)
>- __ww_mutex_check_waiters(lock, ww_ctx);
>+ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
>
> goto skip_wait;
> }
>@@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> * Add in stamp order, waking up waiters that must kill
> * themselves.
> */
>- ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
>+ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q);
> if (ret)
> goto err_early_kill;
> }
>@@ -681,6 +682,11 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> }
>
> raw_spin_unlock(&lock->wait_lock);
>+ /* Make sure we do wakeups before calling schedule */
>+ if (!wake_q_empty(&wake_q)) {
>+ wake_up_q(&wake_q);
>+ wake_q_init(&wake_q);
>+ }
> schedule_preempt_disabled();
>
> first = __mutex_waiter_is_first(lock, &waiter);
>@@ -714,7 +720,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> */
> if (!ww_ctx->is_wait_die &&
> !__mutex_waiter_is_first(lock, &waiter))
>- __ww_mutex_check_waiters(lock, ww_ctx);
>+ __ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
> }
>
> __mutex_remove_waiter(lock, &waiter);
>@@ -730,6 +736,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> ww_mutex_lock_acquired(ww, ww_ctx);
>
> raw_spin_unlock(&lock->wait_lock);
>+ wake_up_q(&wake_q);
> preempt_enable();
> return 0;
>
>@@ -741,6 +748,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> raw_spin_unlock(&lock->wait_lock);
> debug_mutex_free_waiter(&waiter);
> mutex_release(&lock->dep_map, ip);
>+ wake_up_q(&wake_q);
> preempt_enable();
> return ret;
> }
>@@ -934,6 +942,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
> }
> }
>
>+ preempt_disable();
> raw_spin_lock(&lock->wait_lock);
> debug_mutex_unlock(lock);
> if (!list_empty(&lock->wait_list)) {
>@@ -952,8 +961,8 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
> __mutex_handoff(lock, next);
>
> raw_spin_unlock(&lock->wait_lock);
>-
> wake_up_q(&wake_q);
>+ preempt_enable();
> }
>
> #ifndef CONFIG_DEBUG_LOCK_ALLOC
>diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
>index 4a10e8c16fd2..eaac8b196a69 100644
>--- a/kernel/locking/rtmutex.c
>+++ b/kernel/locking/rtmutex.c
>@@ -34,13 +34,15 @@
>
> static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
> struct rt_mutex *lock,
>- struct ww_acquire_ctx *ww_ctx)
>+ struct ww_acquire_ctx *ww_ctx,
>+ struct wake_q_head *wake_q)
> {
> return 0;
> }
>
> static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
>- struct ww_acquire_ctx *ww_ctx)
>+ struct ww_acquire_ctx *ww_ctx,
>+ struct wake_q_head *wake_q)
> {
> }
>
>@@ -1206,6 +1208,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
> struct rt_mutex_waiter *top_waiter = waiter;
> struct rt_mutex_base *next_lock;
> int chain_walk = 0, res;
>+ DEFINE_WAKE_Q(wake_q);
>
> lockdep_assert_held(&lock->wait_lock);
>
>@@ -1244,7 +1247,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
>
> /* Check whether the waiter should back out immediately */
> rtm = container_of(lock, struct rt_mutex, rtmutex);
>- res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
>+ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, &wake_q);
>+ wake_up_q(&wake_q);
> if (res) {
> raw_spin_lock(&task->pi_lock);
> rt_mutex_dequeue(lock, waiter);
>@@ -1677,7 +1681,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> struct ww_acquire_ctx *ww_ctx,
> unsigned int state,
> enum rtmutex_chainwalk chwalk,
>- struct rt_mutex_waiter *waiter)
>+ struct rt_mutex_waiter *waiter,
>+ struct wake_q_head *wake_q)
> {
> struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
> struct ww_mutex *ww = ww_container_of(rtm);
>@@ -1688,7 +1693,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> /* Try to acquire the lock again: */
> if (try_to_take_rt_mutex(lock, current, NULL)) {
> if (build_ww_mutex() && ww_ctx) {
>- __ww_mutex_check_waiters(rtm, ww_ctx);
>+ __ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
> ww_mutex_lock_acquired(ww, ww_ctx);
> }
> return 0;
>@@ -1706,7 +1711,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> /* acquired the lock */
> if (build_ww_mutex() && ww_ctx) {
> if (!ww_ctx->is_wait_die)
>- __ww_mutex_check_waiters(rtm, ww_ctx);
>+ __ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
> ww_mutex_lock_acquired(ww, ww_ctx);
> }
> } else {
>@@ -1728,7 +1733,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
>
> static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
> struct ww_acquire_ctx *ww_ctx,
>- unsigned int state)
>+ unsigned int state,
>+ struct wake_q_head *wake_q)
> {
> struct rt_mutex_waiter waiter;
> int ret;
>@@ -1737,7 +1743,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
> waiter.ww_ctx = ww_ctx;
>
> ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
>- &waiter);
>+ &waiter, wake_q);
>
> debug_rt_mutex_free_waiter(&waiter);
> return ret;
>@@ -1753,6 +1759,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
> struct ww_acquire_ctx *ww_ctx,
> unsigned int state)
> {
>+ DEFINE_WAKE_Q(wake_q);
> unsigned long flags;
> int ret;
>
>@@ -1774,8 +1781,9 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
> * irqsave/restore variants.
> */
> raw_spin_lock_irqsave(&lock->wait_lock, flags);
>- ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
>+ ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);
> raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>+ wake_up_q(&wake_q);
> rt_mutex_post_schedule();
>
> return ret;
>diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
>index 34a59569db6b..e9d2f38b70f3 100644
>--- a/kernel/locking/rwbase_rt.c
>+++ b/kernel/locking/rwbase_rt.c
>@@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
> unsigned int state)
> {
> struct rt_mutex_base *rtm = &rwb->rtmutex;
>+ DEFINE_WAKE_Q(wake_q);
> int ret;
>
> rwbase_pre_schedule();
>@@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
> * For rwlocks this returns 0 unconditionally, so the below
> * !ret conditionals are optimized out.
> */
>- ret = rwbase_rtmutex_slowlock_locked(rtm, state);
>+ ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q);
>
> /*
> * On success the rtmutex is held, so there can't be a writer
>@@ -122,6 +123,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
> if (!ret)
> atomic_inc(&rwb->readers);
> raw_spin_unlock_irq(&rtm->wait_lock);
>+ wake_up_q(&wake_q);
> if (!ret)
> rwbase_rtmutex_unlock(rtm);
>
>diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
>index 2340b6d90ec6..74ebb2915d63 100644
>--- a/kernel/locking/rwsem.c
>+++ b/kernel/locking/rwsem.c
>@@ -1415,8 +1415,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
> #define rwbase_rtmutex_lock_state(rtm, state) \
> __rt_mutex_lock(rtm, state)
>
>-#define rwbase_rtmutex_slowlock_locked(rtm, state) \
>- __rt_mutex_slowlock_locked(rtm, NULL, state)
>+#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \
>+ __rt_mutex_slowlock_locked(rtm, NULL, state, wq)
>
> #define rwbase_rtmutex_unlock(rtm) \
> __rt_mutex_unlock(rtm)
>diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
>index 38e292454fcc..fb1810a14c9d 100644
>--- a/kernel/locking/spinlock_rt.c
>+++ b/kernel/locking/spinlock_rt.c
>@@ -162,7 +162,8 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
> }
>
> static __always_inline int
>-rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
>+rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
>+ struct wake_q_head *wake_q)
> {
> rtlock_slowlock_locked(rtm);
> return 0;
>diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
>index 3ad2cc4823e5..7189c6631d90 100644
>--- a/kernel/locking/ww_mutex.h
>+++ b/kernel/locking/ww_mutex.h
>@@ -275,7 +275,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
> */
> static bool
> __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
>- struct ww_acquire_ctx *ww_ctx)
>+ struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q)
> {
> if (!ww_ctx->is_wait_die)
> return false;
>@@ -284,7 +284,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
> #ifndef WW_RT
> debug_mutex_wake_waiter(lock, waiter);
> #endif
>- wake_up_process(waiter->task);
>+ wake_q_add(wake_q, waiter->task);
> }
>
> return true;
>@@ -299,7 +299,8 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
> */
> static bool __ww_mutex_wound(struct MUTEX *lock,
> struct ww_acquire_ctx *ww_ctx,
>- struct ww_acquire_ctx *hold_ctx)
>+ struct ww_acquire_ctx *hold_ctx,
>+ struct wake_q_head *wake_q)
> {
> struct task_struct *owner = __ww_mutex_owner(lock);
>
>@@ -331,7 +332,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
> * wakeup pending to re-read the wounded state.
> */
> if (owner != current)
>- wake_up_process(owner);
>+ wake_q_add(wake_q, owner);
>
> return true;
> }
>@@ -352,7 +353,8 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
> * The current task must not be on the wait list.
> */
> static void
>-__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
>+__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx,
>+ struct wake_q_head *wake_q)
> {
> struct MUTEX_WAITER *cur;
>
>@@ -364,8 +366,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
> if (!cur->ww_ctx)
> continue;
>
>- if (__ww_mutex_die(lock, cur, ww_ctx) ||
>- __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
>+ if (__ww_mutex_die(lock, cur, ww_ctx, wake_q) ||
>+ __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx, wake_q))
> break;
> }
> }
>@@ -377,6 +379,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
> static __always_inline void
> ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
> {
>+ DEFINE_WAKE_Q(wake_q);
>+
> ww_mutex_lock_acquired(lock, ctx);
>
> /*
>@@ -405,8 +409,10 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
> * die or wound us.
> */
> lock_wait_lock(&lock->base);
>- __ww_mutex_check_waiters(&lock->base, ctx);
>+ __ww_mutex_check_waiters(&lock->base, ctx, &wake_q);
> unlock_wait_lock(&lock->base);
>+
>+ wake_up_q(&wake_q);
> }
>
> static __always_inline int
>@@ -488,7 +494,8 @@ __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
> static inline int
> __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
> struct MUTEX *lock,
>- struct ww_acquire_ctx *ww_ctx)
>+ struct ww_acquire_ctx *ww_ctx,
>+ struct wake_q_head *wake_q)
> {
> struct MUTEX_WAITER *cur, *pos = NULL;
> bool is_wait_die;
>@@ -532,7 +539,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
> pos = cur;
>
> /* Wait-Die: ensure younger waiters die. */
>- __ww_mutex_die(lock, cur, ww_ctx);
>+ __ww_mutex_die(lock, cur, ww_ctx, wake_q);
> }
>
> __ww_waiter_add(lock, waiter, pos);
>@@ -550,7 +557,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
> * such that either we or the fastpath will wound @ww->ctx.
> */
> smp_mb();
>- __ww_mutex_wound(lock, ww_ctx, ww->ctx);
>+ __ww_mutex_wound(lock, ww_ctx, ww->ctx, wake_q);
> }
>
> return 0;
>--
>2.44.0.291.gc1ea87d7ee-goog
>
Powered by blists - more mailing lists