lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 20 Dec 2022 19:25:20 +0800
From:   Zqiang <qiang1.zhang@...el.com>
To:     paulmck@...nel.org, frederic@...nel.org, quic_neeraju@...cinc.com,
        joel@...lfernandes.org
Cc:     rcu@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [PATCH] rcu: Fix race in set and clear TICK_DEP_BIT_RCU_EXP bitmask

For the kernel bulit with CONFIG_NO_HZ_FULL enabled and the following
cpus is nohz_full cpus:

	CPU1                                                 CPU2
rcu_report_exp_cpu_mult                          synchronize_rcu_expedited_wait
   acquires rnp->lock                               mask = rnp->expmask;
                                                    for_each_leaf_node_cpu_mask(rnp, cpu, mask)
   rnp->expmask = rnp->expmask & ~mask;                rdp = per_cpu_ptr(&rcu_data, cpu1);
   for_each_leaf_node_cpu_mask(rnp, cpu, mask)
      rdp = per_cpu_ptr(&rcu_data, cpu1);
      if (!rdp->rcu_forced_tick_exp)
             continue;                                 rdp->rcu_forced_tick_exp = true;
                                                       tick_dep_set_cpu(cpu1, TICK_DEP_BIT_RCU_EXP);

In the above scenario, after CPU1 reported the quiescent state, CPU1
misses the opportunity to clear the TICK_DEP_BIT_RCU_EXP bitmask, it
will not be cleared until the next expedited grace period starts and
the CPU1 quiescent state is reported again. during this window period,
the CPU1 whose tick can not be stopped, if CPU1 has only one runnable
task and this task has aggressive real-time response constraints, this
task may have one of the worst response times.

Therefore, this commit add rnp->lock when set TICK_DEP_BIT_RCU_EXP
bitmask to fix this race.

Signed-off-by: Zqiang <qiang1.zhang@...el.com>
---
 kernel/rcu/tree_exp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 927abaf6c822..e5fe0099488b 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -593,6 +593,7 @@ static void synchronize_rcu_expedited_wait(void)
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	struct rcu_node *rnp_root = rcu_get_root();
+	unsigned long flags;
 
 	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
 	jiffies_stall = rcu_exp_jiffies_till_stall_check();
@@ -601,17 +602,17 @@ static void synchronize_rcu_expedited_wait(void)
 		if (synchronize_rcu_expedited_wait_once(1))
 			return;
 		rcu_for_each_leaf_node(rnp) {
+			raw_spin_lock_irqsave_rcu_node(rnp, flags);
 			mask = READ_ONCE(rnp->expmask);
 			for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
 				rdp = per_cpu_ptr(&rcu_data, cpu);
 				if (rdp->rcu_forced_tick_exp)
 					continue;
 				rdp->rcu_forced_tick_exp = true;
-				preempt_disable();
 				if (cpu_online(cpu))
 					tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
-				preempt_enable();
 			}
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 		j = READ_ONCE(jiffies_till_first_fqs);
 		if (synchronize_rcu_expedited_wait_once(j + HZ))
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ