[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220311022226.595905-1-qiang1.zhang@intel.com>
Date: Fri, 11 Mar 2022 10:22:26 +0800
From: Zqiang <qiang1.zhang@...el.com>
To: paulmck@...nel.org, frederic@...nel.org, urezki@...il.com,
quic_neeraju@...cinc.com, josh@...htriplett.org,
bigeasy@...utronix.de
Cc: juri.lelli@...hat.com, rcu@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v3] rcu: Only boost rcu reader tasks with lower priority than boost kthreads
When RCU_BOOST is enabled, the boost kthreads will boosting readers
who are blocking a given grace period, if the current reader tasks
have a higher priority than boost kthreads(the boost kthreads priority
not always 1, if the kthread_prio is set), boosting is useless, skip
current task and select next task to boosting, reduce the time for a
given grace period.
Suggested-by: Uladzislau Rezki (Sony) <urezki@...il.com>
Signed-off-by: Zqiang <qiang1.zhang@...el.com>
---
v1->v2:
Rename label 'end' to 'skip_boost'.
Add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
do the similar thing as normal grace period.
v2->v3:
Remove redundant dl_task() judgment conditions.
kernel/rcu/tree.h | 2 ++
kernel/rcu/tree_plugin.h | 30 ++++++++++++++++++++++--------
2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8d07bf92d29..862ca09b56c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -103,6 +103,8 @@ struct rcu_node {
/* queued on this rcu_node structure that */
/* are blocking the current grace period, */
/* there can be no such task. */
+ struct list_head *boost_exp_tasks;
+
struct rt_mutex boost_mtx;
/* Used only for the priority-boosting */
/* side effect, not as a lock. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c3d212bc5338..fd37042ecdb2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -535,6 +535,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
if (&t->rcu_node_entry == rnp->boost_tasks)
WRITE_ONCE(rnp->boost_tasks, np);
+ if (&t->rcu_node_entry == rnp->boost_exp_tasks)
+ WRITE_ONCE(rnp->boost_exp_tasks, np);
}
/*
@@ -1022,7 +1024,7 @@ static int rcu_boost(struct rcu_node *rnp)
struct task_struct *t;
struct list_head *tb;
- if (READ_ONCE(rnp->exp_tasks) == NULL &&
+ if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
READ_ONCE(rnp->boost_tasks) == NULL)
return 0; /* Nothing left to boost. */
@@ -1032,7 +1034,7 @@ static int rcu_boost(struct rcu_node *rnp)
* Recheck under the lock: all tasks in need of boosting
* might exit their RCU read-side critical sections on their own.
*/
- if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
+ if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
}
@@ -1043,8 +1045,8 @@ static int rcu_boost(struct rcu_node *rnp)
* expedited grace period must boost all blocked tasks, including
* those blocking the pre-existing normal grace period.
*/
- if (rnp->exp_tasks != NULL)
- tb = rnp->exp_tasks;
+ if (rnp->boost_exp_tasks != NULL)
+ tb = rnp->boost_exp_tasks;
else
tb = rnp->boost_tasks;
@@ -1065,14 +1067,24 @@ static int rcu_boost(struct rcu_node *rnp)
* section.
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
+ if (t->prio <= current->prio) {
+ tb = rcu_next_node_entry(t, rnp);
+ if (rnp->boost_exp_tasks)
+ WRITE_ONCE(rnp->boost_exp_tasks, tb);
+ else
+ WRITE_ONCE(rnp->boost_tasks, tb);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ goto skip_boost;
+ }
+
rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* Lock only for side effect: boosts task t's priority. */
rt_mutex_lock(&rnp->boost_mtx);
rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
rnp->n_boosts++;
-
- return READ_ONCE(rnp->exp_tasks) != NULL ||
+skip_boost:
+ return READ_ONCE(rnp->boost_exp_tasks) != NULL ||
READ_ONCE(rnp->boost_tasks) != NULL;
}
@@ -1090,7 +1102,7 @@ static int rcu_boost_kthread(void *arg)
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
trace_rcu_utilization(TPS("End boost kthread@..._wait"));
rcu_wait(READ_ONCE(rnp->boost_tasks) ||
- READ_ONCE(rnp->exp_tasks));
+ READ_ONCE(rnp->boost_exp_tasks));
trace_rcu_utilization(TPS("Start boost kthread@..._wait"));
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
more2boost = rcu_boost(rnp);
@@ -1129,13 +1141,15 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
- if (rnp->exp_tasks != NULL ||
+ if ((rnp->exp_tasks != NULL && rnp->boost_exp_tasks == NULL) ||
(rnp->gp_tasks != NULL &&
rnp->boost_tasks == NULL &&
rnp->qsmask == 0 &&
(!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
if (rnp->exp_tasks == NULL)
WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
+ else
+ WRITE_ONCE(rnp->boost_exp_tasks, rnp->exp_tasks);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_wake_cond(rnp->boost_kthread_task,
READ_ONCE(rnp->boost_kthread_status));
--
2.25.1
Powered by blists - more mailing lists