[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230427111937.2745231-3-bigeasy@linutronix.de>
Date: Thu, 27 Apr 2023 13:19:35 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: Ben Segall <bsegall@...gle.com>, Boqun Feng <boqun.feng@...il.com>,
Crystal Wood <swood@...hat.com>,
Daniel Bristot de Oliveira <bristot@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Ingo Molnar <mingo@...hat.com>,
John Stultz <jstultz@...gle.com>,
Juri Lelli <juri.lelli@...hat.com>,
Mel Gorman <mgorman@...e.de>,
Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>,
Thomas Gleixner <tglx@...utronix.de>,
Valentin Schneider <vschneid@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Waiman Long <longman@...hat.com>,
Will Deacon <will@...nel.org>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [PATCH v2 2/4] locking/rtmutex: Submit/resume work explicitly before/after blocking
schedule() invokes sched_submit_work() before scheduling and
sched_resume_work() afterwards to ensure that queued block requests are
flushed and the (IO)worker machineries can instantiate new workers if
required. This avoids deadlocks and starvation.
With rt_mutexes this can lead to a subtle problem:
When rtmutex blocks current::pi_blocked_on points to the rtmutex it
blocks on. When one of the functions in sched_submit/resume_work() contends
on a rtmutex based lock then that would corrupt current::pi_blocked_on.
Let rtmutex and the RT lock variants which are based on it invoke
sched_submit/resume_work() explicitly before and after the slowpath so
it's guaranteed that current::pi_blocked_on cannot be corrupted by blocking
on two locks.
This does not apply to the PREEMPT_RT variants of spinlock_t and rwlock_t
as their scheduling slowpath is separate and cannot invoke the work related
functions due to potential deadlocks anyway.
[ tglx: Make it explicit and symmetric. Massage changelog ]
Fixes: e17ba59b7e8e1 ("locking/rtmutex: Guard regular sleeping locks specific functions")
Reported-by: Crystal Wood <swood@...hat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Link: https://lore.kernel.org/4b4ab374d3e24e6ea8df5cadc4297619a6d945af.camel@redhat.com
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
kernel/locking/rtmutex.c | 11 +++++++++--
kernel/locking/rwbase_rt.c | 18 ++++++++++++++++--
kernel/locking/rwsem.c | 6 ++++++
kernel/locking/spinlock_rt.c | 3 +++
4 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 728f434de2bbf..aa66a3c5950a7 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1555,7 +1555,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
raw_spin_unlock_irq(&lock->wait_lock);
if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
- schedule();
+ schedule_rtmutex();
raw_spin_lock_irq(&lock->wait_lock);
set_current_state(state);
@@ -1584,7 +1584,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
WARN(1, "rtmutex deadlock detected\n");
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
- schedule();
+ schedule_rtmutex();
}
}
@@ -1679,6 +1679,12 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
unsigned long flags;
int ret;
+ /*
+ * The task is about to sleep. Invoke sched_submit_work() before
+ * blocking as that might take locks and corrupt tsk::pi_blocked_on.
+ */
+ sched_submit_work();
+
/*
* Technically we could use raw_spin_[un]lock_irq() here, but this can
* be called in early boot if the cmpxchg() fast path is disabled
@@ -1691,6 +1697,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ sched_resume_work();
return ret;
}
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 25ec0239477c2..945d474f5d27f 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -131,10 +131,21 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
unsigned int state)
{
+ int ret;
+
if (rwbase_read_trylock(rwb))
return 0;
- return __rwbase_read_lock(rwb, state);
+ /*
+ * The task is about to sleep. For rwsems this submits work as that
+ * might take locks and corrupt tsk::pi_blocked_on. Must be
+ * explicit here because __rwbase_read_lock() cannot invoke
+ * rt_mutex_slowlock(). NOP for rwlocks.
+ */
+ rwbase_sched_submit_work();
+ ret = __rwbase_read_lock(rwb, state);
+ rwbase_sched_resume_work();
+ return ret;
}
static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
@@ -230,7 +241,10 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
struct rt_mutex_base *rtm = &rwb->rtmutex;
unsigned long flags;
- /* Take the rtmutex as a first step */
+ /*
+ * Take the rtmutex as a first step. For rwsem this will also
+ * invoke sched_submit_work() to flush IO and workers.
+ */
if (rwbase_rtmutex_lock_state(rtm, state))
return -EINTR;
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index acb5a50309a18..aca266006ad47 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1415,6 +1415,12 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
#define rwbase_rtmutex_lock_state(rtm, state) \
__rt_mutex_lock(rtm, state)
+#define rwbase_sched_submit_work() \
+ sched_submit_work()
+
+#define rwbase_sched_resume_work() \
+ sched_resume_work()
+
#define rwbase_rtmutex_slowlock_locked(rtm, state) \
__rt_mutex_slowlock_locked(rtm, NULL, state)
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
index 48a19ed8486d8..62c4a6866087a 100644
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -159,6 +159,9 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
return 0;
}
+static __always_inline void rwbase_sched_submit_work(void) { }
+static __always_inline void rwbase_sched_resume_work(void) { }
+
static __always_inline int
rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
{
--
2.40.1
Powered by blists - more mailing lists