lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 25 Jun 2018 16:04:26 -0700
From:   "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:     linux-kernel@...r.kernel.org
Cc:     mingo@...nel.org, jiangshanlai@...il.com, dipankar@...ibm.com,
        akpm@...ux-foundation.org, mathieu.desnoyers@...icios.com,
        josh@...htriplett.org, tglx@...utronix.de, peterz@...radead.org,
        rostedt@...dmis.org, dhowells@...hat.com, edumazet@...gle.com,
        fweisbec@...il.com, oleg@...hat.com, joel@...lfernandes.org,
        "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: [PATCH tip/core/rcu 4/5] rcu: Diagnostics for grace-period hangs

This commit causes a splat if RCU is idle and a request for a new grace
period is ignored for more than one second.  This splat normally indicates
that some code path asked for a new grace period, but failed to wake up
the RCU grace-period kthread.

Signed-off-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
[ paulmck: Fix bug located by Dan Carpenter and his static checker. ]
---
 kernel/rcu/tree.c | 63 +++++++++++++++++++++++++++++++++++++++++++++--
 kernel/rcu/tree.h |  2 ++
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b1fffa21b9e4..fae4df831e52 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1681,6 +1681,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	}
 	trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
 	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
+	rsp->gp_req_activity = jiffies;
 	if (!rsp->gp_kthread) {
 		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
 		goto unlock_out;
@@ -2113,6 +2114,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 	/* Advance CBs to reduce false positives below. */
 	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
 		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
+		rsp->gp_req_activity = jiffies;
 		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
 				       TPS("newreq"));
 	}
@@ -2744,6 +2746,62 @@ static void force_quiescent_state(struct rcu_state *rsp)
 	rcu_gp_kthread_wake(rsp);
 }
 
+/*
+ * This function checks for grace-period requests that fail to motivate
+ * RCU to come out of its idle mode.
+ */
+static void
+rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
+			 struct rcu_data *rdp)
+{
+	unsigned long flags;
+	unsigned long j;
+	struct rcu_node *rnp_root = rcu_get_root(rsp);
+	static atomic_t warned = ATOMIC_INIT(0);
+
+	if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)))
+		return;
+	j = jiffies; /* Expensive access, and in common case don't get here. */
+	if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
+	    time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
+	    atomic_read(&warned))
+		return;
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	j = jiffies;
+	if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+	    time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
+	    time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
+	    atomic_read(&warned)) {
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		return;
+	}
+	/* Hold onto the leaf lock to make others see warned==1. */
+
+	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
+	j = jiffies;
+	if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+	    time_before(j, rsp->gp_req_activity + HZ) ||
+	    time_before(j, rsp->gp_activity + HZ) ||
+	    atomic_xchg(&warned, 1)) {
+		raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		return;
+	}
+	pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n",
+		 __func__, READ_ONCE(rsp->gpnum),
+		 need_future_gp_element(rcu_get_root(rsp), 0),
+		 need_future_gp_element(rcu_get_root(rsp), 1),
+		 need_future_gp_element(rcu_get_root(rsp), 2),
+		 need_future_gp_element(rcu_get_root(rsp), 3),
+		 j - rsp->gp_req_activity, j - rsp->gp_activity,
+		 rsp->gp_flags, rsp->name,
+		 rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
+	WARN_ON(1);
+	raw_spin_unlock_rcu_node(rnp_root);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
 /*
  * This does the RCU core processing work for the specified rcu_state
  * and rcu_data structures.  This may be called only from the CPU to
@@ -2755,7 +2813,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 	unsigned long flags;
 	bool needwake;
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
-	struct rcu_node *rnp;
+	struct rcu_node *rnp = rdp->mynode;
 
 	WARN_ON_ONCE(!rdp->beenonline);
 
@@ -2769,7 +2827,6 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 		if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
 			local_irq_restore(flags);
 		} else {
-			rnp = rdp->mynode;
 			raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
 			needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2778,6 +2835,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 		}
 	}
 
+	rcu_check_gp_start_stall(rsp, rnp, rdp);
+
 	/* If there are callbacks ready, invoke them. */
 	if (rcu_segcblist_ready_cbs(&rdp->cblist))
 		invoke_rcu_callbacks(rsp, rdp);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7365ac53fdd9..3c1942174c56 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -374,6 +374,8 @@ struct rcu_state {
 						/*  but in jiffies. */
 	unsigned long gp_activity;		/* Time of last GP kthread */
 						/*  activity in jiffies. */
+	unsigned long gp_req_activity;		/* Time of last GP request */
+						/*  in jiffies. */
 	unsigned long jiffies_stall;		/* Time at which to check */
 						/*  for CPU stalls. */
 	unsigned long jiffies_resched;		/* Time at which to resched */
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ