linux-kernel - [PATCH tip/core/rcu 1/3] rcu: fixes for accelerated grace periods for last non-dynticked CPU

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1267231138-27856-1-git-send-email-paulmck@linux.vnet.ibm.com>
Date:	Fri, 26 Feb 2010 16:38:56 -0800
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	mingo@...e.hu, laijs@...fujitsu.com, dipankar@...ibm.com,
	akpm@...ux-foundation.org, mathieu.desnoyers@...ymtl.ca,
	josh@...htriplett.org, dvhltc@...ibm.com, niv@...ibm.com,
	tglx@...utronix.de, peterz@...radead.org, rostedt@...dmis.org,
	Valdis.Kletnieks@...edu, dhowells@...hat.com,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: [PATCH tip/core/rcu 1/3] rcu: fixes for accelerated grace periods for last non-dynticked CPU

It is illegal to invoke __rcu_process_callbacks() with irqs disabled,
so do it indirectly via raise_softirq().  This requires a state-machine
implementation to cycle through the grace-period machinery the required
number of times.

Located-by: Ingo Molnar <mingo@...e.hu>
Signed-off-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
---
 kernel/rcutree.c        |    3 ++
 kernel/rcutree.h        |    1 +
 kernel/rcutree_plugin.h |   73 ++++++++++++++++++++++++++++++++++-------------
 3 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 335bfe4..3ec8160 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1341,6 +1341,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	 * grace-period manipulations above.
 	 */
 	smp_mb(); /* See above block comment. */
+
+	/* If we are last CPU on way to dyntick-idle mode, accelerate it. */
+	rcu_needs_cpu_flush();
 }
 
 static void
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 2ceb083..1439eb5 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -373,5 +373,6 @@ static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
 static void rcu_preempt_send_cbs_to_orphanage(void);
 static void __init __rcu_init_preempt(void);
+static void rcu_needs_cpu_flush(void);
 
 #endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3516de7..ed241fc 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -973,9 +973,19 @@ int rcu_needs_cpu(int cpu)
 	return rcu_needs_cpu_quick_check(cpu);
 }
 
+/*
+ * Check to see if we need to continue a callback-flush operations to
+ * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle
+ * entry is not configured, so we never do need to.
+ */
+static void rcu_needs_cpu_flush(void)
+{
+}
+
 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 
 #define RCU_NEEDS_CPU_FLUSHES 5
+static DEFINE_PER_CPU(int, rcu_dyntick_drain);
 
 /*
  * Check to see if any future RCU-related work will need to be done
@@ -988,39 +998,62 @@ int rcu_needs_cpu(int cpu)
  * only if all other CPUs are already in dynticks-idle mode.  This will
  * allow the CPU cores to be powered down immediately, as opposed to after
  * waiting many milliseconds for grace periods to elapse.
+ *
+ * Because it is not legal to invoke rcu_process_callbacks() with irqs
+ * disabled, we do one pass of force_quiescent_state(), then do a
+ * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
+ * The per-cpu rcu_dyntick_drain variable controls the sequencing.
  */
 int rcu_needs_cpu(int cpu)
 {
-	int c = 1;
-	int i;
+	int c = 0;
 	int thatcpu;
 
 	/* Don't bother unless we are the last non-dyntick-idle CPU. */
 	for_each_cpu_not(thatcpu, nohz_cpu_mask)
-		if (thatcpu != cpu)
+		if (thatcpu != cpu) {
+			per_cpu(rcu_dyntick_drain, cpu) = 0;
 			return rcu_needs_cpu_quick_check(cpu);
-
-	/* Try to push remaining RCU-sched and RCU-bh callbacks through. */
-	for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) {
-		c = 0;
-		if (per_cpu(rcu_sched_data, cpu).nxtlist) {
-			rcu_sched_qs(cpu);
-			force_quiescent_state(&rcu_sched_state, 0);
-			__rcu_process_callbacks(&rcu_sched_state,
-						&per_cpu(rcu_sched_data, cpu));
-			c = !!per_cpu(rcu_sched_data, cpu).nxtlist;
-		}
-		if (per_cpu(rcu_bh_data, cpu).nxtlist) {
-			rcu_bh_qs(cpu);
-			force_quiescent_state(&rcu_bh_state, 0);
-			__rcu_process_callbacks(&rcu_bh_state,
-						&per_cpu(rcu_bh_data, cpu));
-			c = !!per_cpu(rcu_bh_data, cpu).nxtlist;
 		}
+
+	/* Check and update the rcu_dyntick_drain sequencing. */
+	if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+		/* First time through, initialize the counter. */
+		per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
+	} else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+		/* We have hit the limit, so time to give up. */
+		return rcu_needs_cpu_quick_check(cpu);
+	}
+
+	/* Do one step pushing remaining RCU callbacks through. */
+	if (per_cpu(rcu_sched_data, cpu).nxtlist) {
+		rcu_sched_qs(cpu);
+		force_quiescent_state(&rcu_sched_state, 0);
+		c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
+	}
+	if (per_cpu(rcu_bh_data, cpu).nxtlist) {
+		rcu_bh_qs(cpu);
+		force_quiescent_state(&rcu_bh_state, 0);
+		c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
 	}
 
 	/* If RCU callbacks are still pending, RCU still needs this CPU. */
+	if (c)
+		raise_softirq(RCU_SOFTIRQ);
 	return c;
 }
 
+/*
+ * Check to see if we need to continue a callback-flush operations to
+ * allow the last CPU to enter dyntick-idle mode.
+ */
+static void rcu_needs_cpu_flush(void)
+{
+	int cpu = smp_processor_id();
+
+	if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
+		return;
+	(void)rcu_needs_cpu(cpu);
+}
+
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-- 
1.6.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/