lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 11 Mar 2022 10:22:26 +0800
From:   Zqiang <qiang1.zhang@...el.com>
To:     paulmck@...nel.org, frederic@...nel.org, urezki@...il.com,
        quic_neeraju@...cinc.com, josh@...htriplett.org,
        bigeasy@...utronix.de
Cc:     juri.lelli@...hat.com, rcu@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: [PATCH v3] rcu: Only boost rcu reader tasks with lower priority than boost kthreads

When RCU_BOOST is enabled, the boost kthreads will boosting readers
who are blocking a given grace period, if the current reader tasks
have a higher priority than boost kthreads(the boost kthreads priority
not always 1, if the kthread_prio is set), boosting is useless, skip
current task and select next task to boosting, reduce the time for a
given grace period.

Suggested-by: Uladzislau Rezki (Sony) <urezki@...il.com>
Signed-off-by: Zqiang <qiang1.zhang@...el.com>
---
 v1->v2:
 Rename label 'end' to 'skip_boost'.
 Add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
 do the similar thing as normal grace period.
 v2->v3:
 Remove redundant dl_task() judgment conditions.

 kernel/rcu/tree.h        |  2 ++
 kernel/rcu/tree_plugin.h | 30 ++++++++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8d07bf92d29..862ca09b56c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -103,6 +103,8 @@ struct rcu_node {
 				/*  queued on this rcu_node structure that */
 				/*  are blocking the current grace period, */
 				/*  there can be no such task. */
+	struct list_head *boost_exp_tasks;
+
 	struct rt_mutex boost_mtx;
 				/* Used only for the priority-boosting */
 				/*  side effect, not as a lock. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c3d212bc5338..fd37042ecdb2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -535,6 +535,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
 			if (&t->rcu_node_entry == rnp->boost_tasks)
 				WRITE_ONCE(rnp->boost_tasks, np);
+			if (&t->rcu_node_entry == rnp->boost_exp_tasks)
+				WRITE_ONCE(rnp->boost_exp_tasks, np);
 		}
 
 		/*
@@ -1022,7 +1024,7 @@ static int rcu_boost(struct rcu_node *rnp)
 	struct task_struct *t;
 	struct list_head *tb;
 
-	if (READ_ONCE(rnp->exp_tasks) == NULL &&
+	if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
 	    READ_ONCE(rnp->boost_tasks) == NULL)
 		return 0;  /* Nothing left to boost. */
 
@@ -1032,7 +1034,7 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * Recheck under the lock: all tasks in need of boosting
 	 * might exit their RCU read-side critical sections on their own.
 	 */
-	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
+	if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return 0;
 	}
@@ -1043,8 +1045,8 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * expedited grace period must boost all blocked tasks, including
 	 * those blocking the pre-existing normal grace period.
 	 */
-	if (rnp->exp_tasks != NULL)
-		tb = rnp->exp_tasks;
+	if (rnp->boost_exp_tasks != NULL)
+		tb = rnp->boost_exp_tasks;
 	else
 		tb = rnp->boost_tasks;
 
@@ -1065,14 +1067,24 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * section.
 	 */
 	t = container_of(tb, struct task_struct, rcu_node_entry);
+	if (t->prio <= current->prio) {
+		tb = rcu_next_node_entry(t, rnp);
+		if (rnp->boost_exp_tasks)
+			WRITE_ONCE(rnp->boost_exp_tasks, tb);
+		else
+			WRITE_ONCE(rnp->boost_tasks, tb);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		goto skip_boost;
+	}
+
 	rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	/* Lock only for side effect: boosts task t's priority. */
 	rt_mutex_lock(&rnp->boost_mtx);
 	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
 	rnp->n_boosts++;
-
-	return READ_ONCE(rnp->exp_tasks) != NULL ||
+skip_boost:
+	return READ_ONCE(rnp->boost_exp_tasks) != NULL ||
 	       READ_ONCE(rnp->boost_tasks) != NULL;
 }
 
@@ -1090,7 +1102,7 @@ static int rcu_boost_kthread(void *arg)
 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
 		trace_rcu_utilization(TPS("End boost kthread@..._wait"));
 		rcu_wait(READ_ONCE(rnp->boost_tasks) ||
-			 READ_ONCE(rnp->exp_tasks));
+			 READ_ONCE(rnp->boost_exp_tasks));
 		trace_rcu_utilization(TPS("Start boost kthread@..._wait"));
 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
 		more2boost = rcu_boost(rnp);
@@ -1129,13 +1141,15 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
-	if (rnp->exp_tasks != NULL ||
+	if ((rnp->exp_tasks != NULL && rnp->boost_exp_tasks == NULL) ||
 	    (rnp->gp_tasks != NULL &&
 	     rnp->boost_tasks == NULL &&
 	     rnp->qsmask == 0 &&
 	     (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
 		if (rnp->exp_tasks == NULL)
 			WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
+		else
+			WRITE_ONCE(rnp->boost_exp_tasks, rnp->exp_tasks);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		rcu_wake_cond(rnp->boost_kthread_task,
 			      READ_ONCE(rnp->boost_kthread_status));
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ