lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260103002343.6599-3-joelagnelf@nvidia.com>
Date: Fri,  2 Jan 2026 19:23:31 -0500
From: Joel Fernandes <joelagnelf@...dia.com>
To: linux-kernel@...r.kernel.org
Cc: "Paul E . McKenney" <paulmck@...nel.org>,
	Frederic Weisbecker <frederic@...nel.org>,
	Neeraj Upadhyay <neeraj.upadhyay@...nel.org>,
	Joel Fernandes <joelagnelf@...dia.com>,
	Josh Triplett <josh@...htriplett.org>,
	Boqun Feng <boqun.feng@...il.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Lai Jiangshan <jiangshanlai@...il.com>,
	Zqiang <qiang.zhang@...ux.dev>,
	Uladzislau Rezki <urezki@...il.com>,
	joel@...lfernandes.org,
	rcu@...r.kernel.org
Subject: [PATCH RFC 02/14] rcu: Add per-CPU blocked task lists for PREEMPT_RCU

Add per-CPU tracking of tasks blocked in RCU read-side critical
sections. Each rcu_data gets a blkd_list protected by blkd_lock,
mirroring the rcu_node blkd_tasks list at per-CPU granularity.

Tasks are added on preemption and removed on rcu_read_unlock.
A WARN_ON_ONCE in rcu_gp_init verifies list consistency.

Signed-off-by: Joel Fernandes <joelagnelf@...dia.com>
---
 include/linux/sched.h    |  4 ++++
 kernel/fork.c            |  4 ++++
 kernel/rcu/Kconfig       | 12 ++++++++++++
 kernel/rcu/tree.c        | 32 ++++++++++++++++++++++++++++++++
 kernel/rcu/tree.h        |  6 ++++++
 kernel/rcu/tree_plugin.h | 21 +++++++++++++++++++++
 6 files changed, 79 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d395f2810fac..90ce501a568e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -931,6 +931,10 @@ struct task_struct {
 	union rcu_special		rcu_read_unlock_special;
 	struct list_head		rcu_node_entry;
 	struct rcu_node			*rcu_blocked_node;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+	struct list_head		rcu_rdp_entry;
+	int				rcu_blocked_cpu;
+#endif
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 #ifdef CONFIG_TASKS_RCU
diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..7a5ba2d2c1b5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1819,6 +1819,10 @@ static inline void rcu_copy_process(struct task_struct *p)
 	p->rcu_read_unlock_special.s = 0;
 	p->rcu_blocked_node = NULL;
 	INIT_LIST_HEAD(&p->rcu_node_entry);
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+	INIT_LIST_HEAD(&p->rcu_rdp_entry);
+	p->rcu_blocked_cpu = -1;
+#endif
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TASKS_RCU
 	p->rcu_tasks_holdout = false;
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 4d9b21f69eaa..4bb12f1fed09 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -248,6 +248,18 @@ config RCU_EXP_KTHREAD
 
 	  Accept the default if unsure.
 
+config RCU_PER_CPU_BLOCKED_LISTS
+	bool "Use per-CPU blocked task lists in PREEMPT_RCU"
+	depends on PREEMPT_RCU
+	default n
+	help
+	  Enable per-CPU tracking of tasks blocked in RCU read-side
+	  critical sections. This allows to quickly toggle the feature.
+	  Eventually the config will be removed, in favor of always keeping
+	  the optimization enabled.
+
+	  Accept the default if unsure.
+
 config RCU_NOCB_CPU
 	bool "Offload RCU callback processing from boot-selected CPUs"
 	depends on TREE_RCU
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 293bbd9ac3f4..e2b6a4579086 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1809,6 +1809,14 @@ static noinline_for_stack bool rcu_gp_init(void)
 	struct rcu_node *rnp = rcu_get_root();
 	bool start_new_poll;
 	unsigned long old_gp_seq;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+	struct task_struct *t_verify;
+	int cpu_verify;
+	int rnp_count;
+	int rdp_total;
+	struct rcu_data *rdp_cpu;
+	struct task_struct *t_rdp;
+#endif
 
 	WRITE_ONCE(rcu_state.gp_activity, jiffies);
 	raw_spin_lock_irq_rcu_node(rnp);
@@ -1891,6 +1899,26 @@ static noinline_for_stack bool rcu_gp_init(void)
 		 */
 		arch_spin_lock(&rcu_state.ofl_lock);
 		raw_spin_lock_rcu_node(rnp);
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+		/*
+		 * Verify rdp lists consistent with rnp list. Since the unlock
+		 * path removes from rdp before rnp, we can have tasks that are
+		 * on rnp but not on rdp (in the middle of being removed).
+		 * Therefore rnp_count >= rdp_total is the expected invariant.
+		 */
+		rnp_count = 0;
+		rdp_total = 0;
+		list_for_each_entry(t_verify, &rnp->blkd_tasks, rcu_node_entry)
+			rnp_count++;
+		for (cpu_verify = rnp->grplo; cpu_verify <= rnp->grphi; cpu_verify++) {
+			rdp_cpu = per_cpu_ptr(&rcu_data, cpu_verify);
+			raw_spin_lock(&rdp_cpu->blkd_lock);
+			list_for_each_entry(t_rdp, &rdp_cpu->blkd_list, rcu_rdp_entry)
+				rdp_total++;
+			raw_spin_unlock(&rdp_cpu->blkd_lock);
+		}
+		WARN_ON_ONCE(rnp_count < rdp_total);
+#endif
 		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
 		    !rnp->wait_blkd_tasks) {
 			/* Nothing to do on this leaf rcu_node structure. */
@@ -4143,6 +4171,10 @@ rcu_boot_init_percpu_data(int cpu)
 	rdp->rcu_onl_gp_state = RCU_GP_CLEANED;
 	rdp->last_sched_clock = jiffies;
 	rdp->cpu = cpu;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+	raw_spin_lock_init(&rdp->blkd_lock);
+	INIT_LIST_HEAD(&rdp->blkd_list);
+#endif
 	rcu_boot_init_nocb_percpu_data(rdp);
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8bbe7960cda..13d5649a80fb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -294,6 +294,12 @@ struct rcu_data {
 
 	long lazy_len;			/* Length of buffered lazy callbacks. */
 	int cpu;
+
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+	/* 8) Per-CPU blocked task tracking. */
+	raw_spinlock_t blkd_lock;	/* Protects blkd_list. */
+	struct list_head blkd_list;	/* Tasks blocked on this CPU. */
+#endif
 };
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 73ba5f4a968d..5d2bde19131a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -338,6 +338,12 @@ void rcu_note_context_switch(bool preempt)
 		raw_spin_lock_rcu_node(rnp);
 		t->rcu_read_unlock_special.b.blocked = true;
 		t->rcu_blocked_node = rnp;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+		t->rcu_blocked_cpu = rdp->cpu;
+		raw_spin_lock(&rdp->blkd_lock);
+		list_add(&t->rcu_rdp_entry, &rdp->blkd_list);
+		raw_spin_unlock(&rdp->blkd_lock);
+#endif
 
 		/*
 		 * Verify the CPU's sanity, trace the preemption, and
@@ -485,6 +491,10 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	union rcu_special special;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+	int blocked_cpu;
+	struct rcu_data *blocked_rdp;
+#endif
 
 	rdp = this_cpu_ptr(&rcu_data);
 	if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING)
@@ -530,6 +540,17 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 		 * to loop.  Retain a WARN_ON_ONCE() out of sheer paranoia.
 		 */
 		rnp = t->rcu_blocked_node;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+		/* Remove from per-CPU list if task was added to it. */
+		blocked_cpu = t->rcu_blocked_cpu;
+		if (blocked_cpu != -1) {
+			blocked_rdp = per_cpu_ptr(&rcu_data, blocked_cpu);
+			raw_spin_lock(&blocked_rdp->blkd_lock);
+			list_del_init(&t->rcu_rdp_entry);
+			t->rcu_blocked_cpu = -1;
+			raw_spin_unlock(&blocked_rdp->blkd_lock);
+		}
+#endif
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
 		WARN_ON_ONCE(rnp != t->rcu_blocked_node);
 		WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ