lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue,  6 Oct 2015 09:29:22 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	mingo@...nel.org, jiangshanlai@...il.com, dipankar@...ibm.com,
	akpm@...ux-foundation.org, mathieu.desnoyers@...icios.com,
	josh@...htriplett.org, tglx@...utronix.de, peterz@...radead.org,
	rostedt@...dmis.org, dhowells@...hat.com, edumazet@...gle.com,
	dvhart@...ux.intel.com, fweisbec@...il.com, oleg@...hat.com,
	bobby.prani@...il.com,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: [PATCH tip/core/rcu 03/18] rcu: Consolidate tree setup for synchronize_rcu_expedited()

This commit replaces sync_rcu_preempt_exp_init1(() and
sync_rcu_preempt_exp_init2() with sync_exp_reset_tree_hotplug()
and sync_exp_reset_tree(), which will also be used by
synchronize_sched_expedited(), and sync_rcu_exp_select_nodes(), which
contains code specific to synchronize_rcu_expedited().

Signed-off-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
---
 kernel/rcu/tree.c        |  86 ++++++++++++++++++++++++++++++++++++++-
 kernel/rcu/tree.h        |  17 +++++---
 kernel/rcu/tree_plugin.h | 102 +++++++++--------------------------------------
 3 files changed, 115 insertions(+), 90 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 59af27d8bc6a..8526896afea7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3380,6 +3380,87 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
 }
 
 /*
+ * Reset the ->expmaskinit values in the rcu_node tree to reflect any
+ * recent CPU-online activity.  Note that these masks are not cleared
+ * when CPUs go offline, so they reflect the union of all CPUs that have
+ * ever been online.  This means that this function normally takes its
+ * no-work-to-do fastpath.
+ */
+static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
+{
+	bool done;
+	unsigned long flags;
+	unsigned long mask;
+	unsigned long oldmask;
+	int ncpus = READ_ONCE(rsp->ncpus);
+	struct rcu_node *rnp;
+	struct rcu_node *rnp_up;
+
+	/* If no new CPUs onlined since last time, nothing to do. */
+	if (likely(ncpus == rsp->ncpus_snap))
+		return;
+	rsp->ncpus_snap = ncpus;
+
+	/*
+	 * Each pass through the following loop propagates newly onlined
+	 * CPUs for the current rcu_node structure up the rcu_node tree.
+	 */
+	rcu_for_each_leaf_node(rsp, rnp) {
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		smp_mb__after_unlock_lock();
+		if (rnp->expmaskinit == rnp->expmaskinitnext) {
+			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			continue;  /* No new CPUs, nothing to do. */
+		}
+
+		/* Update this node's mask, track old value for propagation. */
+		oldmask = rnp->expmaskinit;
+		rnp->expmaskinit = rnp->expmaskinitnext;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+		/* If was already nonzero, nothing to propagate. */
+		if (oldmask)
+			continue;
+
+		/* Propagate the new CPU up the tree. */
+		mask = rnp->grpmask;
+		rnp_up = rnp->parent;
+		done = false;
+		while (rnp_up) {
+			raw_spin_lock_irqsave(&rnp_up->lock, flags);
+			smp_mb__after_unlock_lock();
+			if (rnp_up->expmaskinit)
+				done = true;
+			rnp_up->expmaskinit |= mask;
+			raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
+			if (done)
+				break;
+			mask = rnp_up->grpmask;
+			rnp_up = rnp_up->parent;
+		}
+	}
+}
+
+/*
+ * Reset the ->expmask values in the rcu_node tree in preparation for
+ * a new expedited grace period.
+ */
+static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_node *rnp;
+
+	sync_exp_reset_tree_hotplug(rsp);
+	rcu_for_each_node_breadth_first(rsp, rnp) {
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		smp_mb__after_unlock_lock();
+		WARN_ON_ONCE(rnp->expmask);
+		rnp->expmask = rnp->expmaskinit;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	}
+}
+
+/*
  * Return non-zero if there are any tasks in RCU read-side critical
  * sections blocking the current preemptible-RCU expedited grace period.
  * If there is no preemptible-RCU expedited grace period currently in
@@ -3971,7 +4052,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 
 	/* Set up local state, ensuring consistent view of global state. */
 	raw_spin_lock_irqsave(&rnp->lock, flags);
-	rdp->beenonline = 1;	 /* We have now been online. */
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
@@ -3993,6 +4073,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	raw_spin_lock(&rnp->lock);		/* irqs already disabled. */
 	smp_mb__after_unlock_lock();
 	rnp->qsmaskinitnext |= mask;
+	rnp->expmaskinitnext |= mask;
+	if (!rdp->beenonline)
+		WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
+	rdp->beenonline = true;	 /* We have now been online. */
 	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
 	rdp->completed = rnp->completed;
 	rdp->passed_quiesce = false;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2e991f8361e4..a57f25ecca58 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -171,16 +171,21 @@ struct rcu_node {
 				/*  an rcu_data structure, otherwise, each */
 				/*  bit corresponds to a child rcu_node */
 				/*  structure. */
-	unsigned long expmask;	/* Groups that have ->blkd_tasks */
-				/*  elements that need to drain to allow the */
-				/*  current expedited grace period to */
-				/*  complete (only for PREEMPT_RCU). */
 	unsigned long qsmaskinit;
-				/* Per-GP initial value for qsmask & expmask. */
+				/* Per-GP initial value for qsmask. */
 				/*  Initialized from ->qsmaskinitnext at the */
 				/*  beginning of each grace period. */
 	unsigned long qsmaskinitnext;
 				/* Online CPUs for next grace period. */
+	unsigned long expmask;	/* CPUs or groups that need to check in */
+				/*  to allow the current expedited GP */
+				/*  to complete. */
+	unsigned long expmaskinit;
+				/* Per-GP initial values for expmask. */
+				/*  Initialized from ->expmaskinitnext at the */
+				/*  beginning of each expedited GP. */
+	unsigned long expmaskinitnext;
+				/* Online CPUs for next expedited GP. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
 				/*  Only one bit will be set in this mask. */
 	int	grplo;		/* lowest-numbered CPU or group here. */
@@ -466,6 +471,7 @@ struct rcu_state {
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */
 		     void (*func)(struct rcu_head *head));
+	int ncpus;				/* # CPUs seen so far. */
 
 	/* The following fields are guarded by the root rcu_node's lock. */
 
@@ -508,6 +514,7 @@ struct rcu_state {
 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
 	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
+	int ncpus_snap;				/* # CPUs seen last time. */
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
 						/*  force_quiescent_state(). */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e73be8539978..62d05413b7ba 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -536,86 +536,28 @@ void synchronize_rcu(void)
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
 /*
- * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure, phase 1.  If there
- * are such tasks, set the ->expmask bits up the rcu_node tree and also
- * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
- * that work is needed here.
- *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Select the nodes that the upcoming expedited grace period needs
+ * to wait for.
  */
-static void
-sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
+static void sync_rcu_exp_select_nodes(struct rcu_state *rsp)
 {
 	unsigned long flags;
-	unsigned long mask;
-	struct rcu_node *rnp_up;
+	struct rcu_node *rnp;
 
-	raw_spin_lock_irqsave(&rnp->lock, flags);
-	smp_mb__after_unlock_lock();
-	WARN_ON_ONCE(rnp->expmask);
-	WARN_ON_ONCE(rnp->exp_tasks);
-	if (!rcu_preempt_has_tasks(rnp)) {
-		/* No blocked tasks, nothing to do. */
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-		return;
-	}
-	/* Call for Phase 2 and propagate ->expmask bits up the tree. */
-	rnp->expmask = 1;
-	rnp_up = rnp;
-	while (rnp_up->parent) {
-		mask = rnp_up->grpmask;
-		rnp_up = rnp_up->parent;
-		if (rnp_up->expmask & mask)
-			break;
-		raw_spin_lock(&rnp_up->lock); /* irqs already off */
+	sync_exp_reset_tree(rsp);
+	rcu_for_each_leaf_node(rsp, rnp) {
+		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
-		rnp_up->expmask |= mask;
-		raw_spin_unlock(&rnp_up->lock); /* irqs still off */
-	}
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
- * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure, phase 2.  If the
- * leaf rcu_node structure has its ->expmask field set, check for tasks.
- * If there are some, clear ->expmask and set ->exp_tasks accordingly,
- * then initiate RCU priority boosting.  Otherwise, clear ->expmask and
- * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
- * enabling rcu_read_unlock_special() to do the bit-clearing.
- *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
- */
-static void
-sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rnp->lock, flags);
-	smp_mb__after_unlock_lock();
-	if (!rnp->expmask) {
-		/* Phase 1 didn't do anything, so Phase 2 doesn't either. */
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-		return;
-	}
-
-	/* Phase 1 is over. */
-	rnp->expmask = 0;
-
-	/*
-	 * If there are still blocked tasks, set up ->exp_tasks so that
-	 * rcu_read_unlock_special() will wake us and then boost them.
-	 */
-	if (rcu_preempt_has_tasks(rnp)) {
-		rnp->exp_tasks = rnp->blkd_tasks.next;
-		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
-		return;
+		rnp->expmask = 0; /* No per-CPU component yet. */
+		if (!rcu_preempt_has_tasks(rnp)) {
+			/* FIXME: Want __rcu_report_exp_rnp() here. */
+			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		} else {
+			rnp->exp_tasks = rnp->blkd_tasks.next;
+			rcu_initiate_boost(rnp, flags);
+		}
+		rcu_report_exp_rnp(rsp, rnp, false);
 	}
-
-	/* No longer any blocked tasks, so undo bit setting. */
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	rcu_report_exp_rnp(rsp, rnp, false);
 }
 
 /**
@@ -648,16 +590,8 @@ void synchronize_rcu_expedited(void)
 	/* force all RCU readers onto ->blkd_tasks lists. */
 	synchronize_sched_expedited();
 
-	/*
-	 * Snapshot current state of ->blkd_tasks lists into ->expmask.
-	 * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
-	 * to start clearing them.  Doing this in one phase leads to
-	 * strange races between setting and clearing bits, so just say "no"!
-	 */
-	rcu_for_each_leaf_node(rsp, rnp)
-		sync_rcu_preempt_exp_init1(rsp, rnp);
-	rcu_for_each_leaf_node(rsp, rnp)
-		sync_rcu_preempt_exp_init2(rsp, rnp);
+	/* Initialize the rcu_node tree in preparation for the wait. */
+	sync_rcu_exp_select_nodes(rsp);
 
 	/* Wait for snapshotted ->blkd_tasks lists to drain. */
 	rnp = rcu_get_root(rsp);
-- 
2.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists