[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1339409176.7350.26.camel@marge.simpson.net>
Date: Mon, 11 Jun 2012 12:06:16 +0200
From: Mike Galbraith <mgalbraith@...ell.com>
To: LKML <linux-kernel@...r.kernel.org>
Cc: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: rcu: endless stalls
Greetings,
I received a report of a 48 core UV box hitting a gripe, taking longer
than timeout to emit same, so box griped endlessly, forcing reboot.
The below might prevent that.. and bust other stuff for free :)
rcu: one gripe at a time please
Not-compiled-by:
Not-signed-off-by:
Not-etc-by:
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0da7b88..6462056d6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -818,10 +818,25 @@ static void print_cpu_stall(struct rcu_state *rsp)
set_need_resched(); /* kick ourselves to get things going. */
}
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+ rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_preempt_stall_reset();
+}
+
static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
- unsigned long j;
- unsigned long js;
+ unsigned long j, js, flags;
struct rcu_node *rnp;
if (rcu_cpu_stall_suppress)
@@ -832,13 +847,23 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
/* We haven't checked in, so go dump stack. */
+ rcu_cpu_stall_suppress = 1;
print_cpu_stall(rsp);
+ local_irq_save(flags);
+ rcu_cpu_stall_reset();
+ local_irq_restore(flags);
+ rcu_cpu_stall_suppress = 0;
} else if (rcu_gp_in_progress(rsp) &&
ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
/* They had a few time units to dump stack, so complain. */
+ rcu_cpu_stall_suppress = 1;
print_other_cpu_stall(rsp);
+ local_irq_save(flags);
+ rcu_cpu_stall_reset();
+ local_irq_restore(flags);
+ rcu_cpu_stall_suppress = 0;
}
}
@@ -848,22 +873,6 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
return NOTIFY_DONE;
}
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
- rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
- rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
- rcu_preempt_stall_reset();
-}
-
static struct notifier_block rcu_panic_block = {
.notifier_call = rcu_panic,
};
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists