lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 25 Mar 2024 11:56:51 -0700
From: Davidlohr Bueso <dave@...olabs.net>
To: John Stultz <jstultz@...gle.com>
Cc: LKML <linux-kernel@...r.kernel.org>, 
	Peter Zijlstra <peterz@...radead.org>, Joel Fernandes <joelaf@...gle.com>, 
	Qais Yousef <qyousef@...gle.com>, Ingo Molnar <mingo@...hat.com>, 
	Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>, 
	Dietmar Eggemann <dietmar.eggemann@....com>, Valentin Schneider <vschneid@...hat.com>, 
	Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>, 
	Zimuzo Ezeozue <zezeozue@...gle.com>, Youssef Esmat <youssefesmat@...gle.com>, 
	Mel Gorman <mgorman@...e.de>, Daniel Bristot de Oliveira <bristot@...hat.com>, 
	Will Deacon <will@...nel.org>, Waiman Long <longman@...hat.com>, 
	Boqun Feng <boqun.feng@...il.com>, "Paul E. McKenney" <paulmck@...nel.org>, 
	Metin Kaya <Metin.Kaya@....com>, Xuewen Yan <xuewen.yan94@...il.com>, 
	K Prateek Nayak <kprateek.nayak@....com>, Thomas Gleixner <tglx@...utronix.de>, kernel-team@...roid.com
Subject: Re: [PATCH v9 1/7] locking/mutex: Remove wakeups from under
 mutex::wait_lock

On Thu, 14 Mar 2024, John Stultz wrote:

>From: Peter Zijlstra <peterz@...radead.org>
>
>In preparation to nest mutex::wait_lock under rq::lock we need to remove
>wakeups from under it.

Acked-by: Davidlohr Bueso <dave@...olabs.net>

>
>Cc: Joel Fernandes <joelaf@...gle.com>
>Cc: Qais Yousef <qyousef@...gle.com>
>Cc: Ingo Molnar <mingo@...hat.com>
>Cc: Peter Zijlstra <peterz@...radead.org>
>Cc: Juri Lelli <juri.lelli@...hat.com>
>Cc: Vincent Guittot <vincent.guittot@...aro.org>
>Cc: Dietmar Eggemann <dietmar.eggemann@....com>
>Cc: Valentin Schneider <vschneid@...hat.com>
>Cc: Steven Rostedt <rostedt@...dmis.org>
>Cc: Ben Segall <bsegall@...gle.com>
>Cc: Zimuzo Ezeozue <zezeozue@...gle.com>
>Cc: Youssef Esmat <youssefesmat@...gle.com>
>Cc: Mel Gorman <mgorman@...e.de>
>Cc: Daniel Bristot de Oliveira <bristot@...hat.com>
>Cc: Will Deacon <will@...nel.org>
>Cc: Waiman Long <longman@...hat.com>
>Cc: Boqun Feng <boqun.feng@...il.com>
>Cc: "Paul E. McKenney" <paulmck@...nel.org>
>Cc: Metin Kaya <Metin.Kaya@....com>
>Cc: Xuewen Yan <xuewen.yan94@...il.com>
>Cc: K Prateek Nayak <kprateek.nayak@....com>
>Cc: Thomas Gleixner <tglx@...utronix.de>
>Cc: kernel-team@...roid.com
>Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
>[Heavily changed after 55f036ca7e74 ("locking: WW mutex cleanup") and
>08295b3b5bee ("locking: Implement an algorithm choice for Wound-Wait
>mutexes")]
>Signed-off-by: Juri Lelli <juri.lelli@...hat.com>
>[jstultz: rebased to mainline, added extra wake_up_q & init
> to avoid hangs, similar to Connor's rework of this patch]
>Signed-off-by: John Stultz <jstultz@...gle.com>
>---
>v5:
>* Reverted back to an earlier version of this patch to undo
>  the change that kept the wake_q in the ctx structure, as
>  that broke the rule that the wake_q must always be on the
>  stack, as its not safe for concurrency.
>v6:
>* Made tweaks suggested by Waiman Long
>v7:
>* Fixups to pass wake_qs down for PREEMPT_RT logic
>---
> kernel/locking/mutex.c       | 17 +++++++++++++----
> kernel/locking/rtmutex.c     | 26 +++++++++++++++++---------
> kernel/locking/rwbase_rt.c   |  4 +++-
> kernel/locking/rwsem.c       |  4 ++--
> kernel/locking/spinlock_rt.c |  3 ++-
> kernel/locking/ww_mutex.h    | 29 ++++++++++++++++++-----------
> 6 files changed, 55 insertions(+), 28 deletions(-)
>
>diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
>index cbae8c0b89ab..980ce630232c 100644
>--- a/kernel/locking/mutex.c
>+++ b/kernel/locking/mutex.c
>@@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 		    struct lockdep_map *nest_lock, unsigned long ip,
> 		    struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
> {
>+	DEFINE_WAKE_Q(wake_q);
> 	struct mutex_waiter waiter;
> 	struct ww_mutex *ww;
> 	int ret;
>@@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 	 */
> 	if (__mutex_trylock(lock)) {
> 		if (ww_ctx)
>-			__ww_mutex_check_waiters(lock, ww_ctx);
>+			__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
>
> 		goto skip_wait;
> 	}
>@@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 		 * Add in stamp order, waking up waiters that must kill
> 		 * themselves.
> 		 */
>-		ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
>+		ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q);
> 		if (ret)
> 			goto err_early_kill;
> 	}
>@@ -681,6 +682,11 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 		}
>
> 		raw_spin_unlock(&lock->wait_lock);
>+		/* Make sure we do wakeups before calling schedule */
>+		if (!wake_q_empty(&wake_q)) {
>+			wake_up_q(&wake_q);
>+			wake_q_init(&wake_q);
>+		}
> 		schedule_preempt_disabled();
>
> 		first = __mutex_waiter_is_first(lock, &waiter);
>@@ -714,7 +720,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 		 */
> 		if (!ww_ctx->is_wait_die &&
> 		    !__mutex_waiter_is_first(lock, &waiter))
>-			__ww_mutex_check_waiters(lock, ww_ctx);
>+			__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
> 	}
>
> 	__mutex_remove_waiter(lock, &waiter);
>@@ -730,6 +736,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 		ww_mutex_lock_acquired(ww, ww_ctx);
>
> 	raw_spin_unlock(&lock->wait_lock);
>+	wake_up_q(&wake_q);
> 	preempt_enable();
> 	return 0;
>
>@@ -741,6 +748,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
> 	raw_spin_unlock(&lock->wait_lock);
> 	debug_mutex_free_waiter(&waiter);
> 	mutex_release(&lock->dep_map, ip);
>+	wake_up_q(&wake_q);
> 	preempt_enable();
> 	return ret;
> }
>@@ -934,6 +942,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
> 		}
> 	}
>
>+	preempt_disable();
> 	raw_spin_lock(&lock->wait_lock);
> 	debug_mutex_unlock(lock);
> 	if (!list_empty(&lock->wait_list)) {
>@@ -952,8 +961,8 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
> 		__mutex_handoff(lock, next);
>
> 	raw_spin_unlock(&lock->wait_lock);
>-
> 	wake_up_q(&wake_q);
>+	preempt_enable();
> }
>
> #ifndef CONFIG_DEBUG_LOCK_ALLOC
>diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
>index 4a10e8c16fd2..eaac8b196a69 100644
>--- a/kernel/locking/rtmutex.c
>+++ b/kernel/locking/rtmutex.c
>@@ -34,13 +34,15 @@
>
> static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
> 					struct rt_mutex *lock,
>-					struct ww_acquire_ctx *ww_ctx)
>+					struct ww_acquire_ctx *ww_ctx,
>+					struct wake_q_head *wake_q)
> {
> 	return 0;
> }
>
> static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
>-					    struct ww_acquire_ctx *ww_ctx)
>+					    struct ww_acquire_ctx *ww_ctx,
>+					    struct wake_q_head *wake_q)
> {
> }
>
>@@ -1206,6 +1208,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
> 	struct rt_mutex_waiter *top_waiter = waiter;
> 	struct rt_mutex_base *next_lock;
> 	int chain_walk = 0, res;
>+	DEFINE_WAKE_Q(wake_q);
>
> 	lockdep_assert_held(&lock->wait_lock);
>
>@@ -1244,7 +1247,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
>
> 		/* Check whether the waiter should back out immediately */
> 		rtm = container_of(lock, struct rt_mutex, rtmutex);
>-		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
>+		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, &wake_q);
>+		wake_up_q(&wake_q);
> 		if (res) {
> 			raw_spin_lock(&task->pi_lock);
> 			rt_mutex_dequeue(lock, waiter);
>@@ -1677,7 +1681,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> 				       struct ww_acquire_ctx *ww_ctx,
> 				       unsigned int state,
> 				       enum rtmutex_chainwalk chwalk,
>-				       struct rt_mutex_waiter *waiter)
>+				       struct rt_mutex_waiter *waiter,
>+				       struct wake_q_head *wake_q)
> {
> 	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
> 	struct ww_mutex *ww = ww_container_of(rtm);
>@@ -1688,7 +1693,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> 	/* Try to acquire the lock again: */
> 	if (try_to_take_rt_mutex(lock, current, NULL)) {
> 		if (build_ww_mutex() && ww_ctx) {
>-			__ww_mutex_check_waiters(rtm, ww_ctx);
>+			__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
> 			ww_mutex_lock_acquired(ww, ww_ctx);
> 		}
> 		return 0;
>@@ -1706,7 +1711,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
> 		/* acquired the lock */
> 		if (build_ww_mutex() && ww_ctx) {
> 			if (!ww_ctx->is_wait_die)
>-				__ww_mutex_check_waiters(rtm, ww_ctx);
>+				__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
> 			ww_mutex_lock_acquired(ww, ww_ctx);
> 		}
> 	} else {
>@@ -1728,7 +1733,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
>
> static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
> 					     struct ww_acquire_ctx *ww_ctx,
>-					     unsigned int state)
>+					     unsigned int state,
>+					     struct wake_q_head *wake_q)
> {
> 	struct rt_mutex_waiter waiter;
> 	int ret;
>@@ -1737,7 +1743,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
> 	waiter.ww_ctx = ww_ctx;
>
> 	ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
>-				  &waiter);
>+				  &waiter, wake_q);
>
> 	debug_rt_mutex_free_waiter(&waiter);
> 	return ret;
>@@ -1753,6 +1759,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
> 				     struct ww_acquire_ctx *ww_ctx,
> 				     unsigned int state)
> {
>+	DEFINE_WAKE_Q(wake_q);
> 	unsigned long flags;
> 	int ret;
>
>@@ -1774,8 +1781,9 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
> 	 * irqsave/restore variants.
> 	 */
> 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
>-	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
>+	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);
> 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>+	wake_up_q(&wake_q);
> 	rt_mutex_post_schedule();
>
> 	return ret;
>diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
>index 34a59569db6b..e9d2f38b70f3 100644
>--- a/kernel/locking/rwbase_rt.c
>+++ b/kernel/locking/rwbase_rt.c
>@@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
> 				      unsigned int state)
> {
> 	struct rt_mutex_base *rtm = &rwb->rtmutex;
>+	DEFINE_WAKE_Q(wake_q);
> 	int ret;
>
> 	rwbase_pre_schedule();
>@@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
> 	 * For rwlocks this returns 0 unconditionally, so the below
> 	 * !ret conditionals are optimized out.
> 	 */
>-	ret = rwbase_rtmutex_slowlock_locked(rtm, state);
>+	ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q);
>
> 	/*
> 	 * On success the rtmutex is held, so there can't be a writer
>@@ -122,6 +123,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
> 	if (!ret)
> 		atomic_inc(&rwb->readers);
> 	raw_spin_unlock_irq(&rtm->wait_lock);
>+	wake_up_q(&wake_q);
> 	if (!ret)
> 		rwbase_rtmutex_unlock(rtm);
>
>diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
>index 2340b6d90ec6..74ebb2915d63 100644
>--- a/kernel/locking/rwsem.c
>+++ b/kernel/locking/rwsem.c
>@@ -1415,8 +1415,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
> #define rwbase_rtmutex_lock_state(rtm, state)		\
> 	__rt_mutex_lock(rtm, state)
>
>-#define rwbase_rtmutex_slowlock_locked(rtm, state)	\
>-	__rt_mutex_slowlock_locked(rtm, NULL, state)
>+#define rwbase_rtmutex_slowlock_locked(rtm, state, wq)	\
>+	__rt_mutex_slowlock_locked(rtm, NULL, state, wq)
>
> #define rwbase_rtmutex_unlock(rtm)			\
> 	__rt_mutex_unlock(rtm)
>diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
>index 38e292454fcc..fb1810a14c9d 100644
>--- a/kernel/locking/spinlock_rt.c
>+++ b/kernel/locking/spinlock_rt.c
>@@ -162,7 +162,8 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
> }
>
> static __always_inline int
>-rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
>+rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
>+			       struct wake_q_head *wake_q)
> {
> 	rtlock_slowlock_locked(rtm);
> 	return 0;
>diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
>index 3ad2cc4823e5..7189c6631d90 100644
>--- a/kernel/locking/ww_mutex.h
>+++ b/kernel/locking/ww_mutex.h
>@@ -275,7 +275,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
>  */
> static bool
> __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
>-	       struct ww_acquire_ctx *ww_ctx)
>+	       struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q)
> {
> 	if (!ww_ctx->is_wait_die)
> 		return false;
>@@ -284,7 +284,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
> #ifndef WW_RT
> 		debug_mutex_wake_waiter(lock, waiter);
> #endif
>-		wake_up_process(waiter->task);
>+		wake_q_add(wake_q, waiter->task);
> 	}
>
> 	return true;
>@@ -299,7 +299,8 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
>  */
> static bool __ww_mutex_wound(struct MUTEX *lock,
> 			     struct ww_acquire_ctx *ww_ctx,
>-			     struct ww_acquire_ctx *hold_ctx)
>+			     struct ww_acquire_ctx *hold_ctx,
>+			     struct wake_q_head *wake_q)
> {
> 	struct task_struct *owner = __ww_mutex_owner(lock);
>
>@@ -331,7 +332,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
> 		 * wakeup pending to re-read the wounded state.
> 		 */
> 		if (owner != current)
>-			wake_up_process(owner);
>+			wake_q_add(wake_q, owner);
>
> 		return true;
> 	}
>@@ -352,7 +353,8 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
>  * The current task must not be on the wait list.
>  */
> static void
>-__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
>+__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx,
>+			 struct wake_q_head *wake_q)
> {
> 	struct MUTEX_WAITER *cur;
>
>@@ -364,8 +366,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
> 		if (!cur->ww_ctx)
> 			continue;
>
>-		if (__ww_mutex_die(lock, cur, ww_ctx) ||
>-		    __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
>+		if (__ww_mutex_die(lock, cur, ww_ctx, wake_q) ||
>+		    __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx, wake_q))
> 			break;
> 	}
> }
>@@ -377,6 +379,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
> static __always_inline void
> ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
> {
>+	DEFINE_WAKE_Q(wake_q);
>+
> 	ww_mutex_lock_acquired(lock, ctx);
>
> 	/*
>@@ -405,8 +409,10 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
> 	 * die or wound us.
> 	 */
> 	lock_wait_lock(&lock->base);
>-	__ww_mutex_check_waiters(&lock->base, ctx);
>+	__ww_mutex_check_waiters(&lock->base, ctx, &wake_q);
> 	unlock_wait_lock(&lock->base);
>+
>+	wake_up_q(&wake_q);
> }
>
> static __always_inline int
>@@ -488,7 +494,8 @@ __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
> static inline int
> __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
> 		      struct MUTEX *lock,
>-		      struct ww_acquire_ctx *ww_ctx)
>+		      struct ww_acquire_ctx *ww_ctx,
>+		      struct wake_q_head *wake_q)
> {
> 	struct MUTEX_WAITER *cur, *pos = NULL;
> 	bool is_wait_die;
>@@ -532,7 +539,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
> 		pos = cur;
>
> 		/* Wait-Die: ensure younger waiters die. */
>-		__ww_mutex_die(lock, cur, ww_ctx);
>+		__ww_mutex_die(lock, cur, ww_ctx, wake_q);
> 	}
>
> 	__ww_waiter_add(lock, waiter, pos);
>@@ -550,7 +557,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
> 		 * such that either we or the fastpath will wound @ww->ctx.
> 		 */
> 		smp_mb();
>-		__ww_mutex_wound(lock, ww_ctx, ww->ctx);
>+		__ww_mutex_wound(lock, ww_ctx, ww->ctx, wake_q);
> 	}
>
> 	return 0;
>-- 
>2.44.0.291.gc1ea87d7ee-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ