[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20110324174803.GA18929@tsunami.ccur.com>
Date: Thu, 24 Mar 2011 13:48:03 -0400
From: Joe Korty <joe.korty@...r.com>
To: paulmck@...ux.vnet.ibm.com
Cc: fweisbec@...il.com, peterz@...radead.org, laijs@...fujitsu.com,
mathieu.desnoyers@...icios.com, dhowells@...hat.com,
loic.minier@...aro.org, dhaval.giani@...il.com, tglx@...utronix.de,
josh@...htriplett.org, houston.jim@...cast.net,
andi@...stfloor.org, linux-kernel@...r.kernel.org
Subject: [PATCH 18/24] jrcu: refactor watchdog code
jrcu: refactor watchdog code.
Much too complicated, simplify.
Also, don't use sched_clock(), we don't need that kind
of precision. Instead, on every jrcu wakeup, we add into
a watchdog counter one RCU_HZ worth of usecs and check that
against the limit. Another nice thing, if we spend a long
time in NMI (think 'kernel debugger'), this new watchdog
ctr won't increment, which is what we want to happen,
while the old sched_clock() method continues to advance.
Thus the sched_clock version is technically broken unless
compensating code is added.
Signed-off-by: Joe Korty <joe.korty@...r.com>
Index: b/kernel/jrcu.c
===================================================================
--- a/kernel/jrcu.c
+++ b/kernel/jrcu.c
@@ -139,9 +139,9 @@ int rcu_hz_delta_us = RCU_HZ_DELTA_US;
int rcu_scheduler_active __read_mostly;
int rcu_nmi_seen __read_mostly;
-static u64 rcu_timestamp;
-int rcu_wdog = 30; /* rcu watchdog interval, in seconds */
+static int rcu_wdog_ctr; /* time since last end-of-batch, in usecs */
+static int rcu_wdog_lim = 10 * USEC_PER_SEC; /* rcu watchdog interval */
/*
* Return our CPU id or zero if we are too early in the boot process to
@@ -299,7 +299,6 @@ static void __rcu_delimit_batches(struct
struct rcu_data *rd;
struct rcu_list *plist;
int cpu, eob, prev;
- u64 rcu_now;
/* If an NMI occured then the previous batch may not yet be
* quiescent. Let's wait till it is.
@@ -325,34 +324,24 @@ static void __rcu_delimit_batches(struct
}
}
- /*
- * Force end-of-batch if too much time (n seconds) has
- * gone by.
- */
- rcu_now = sched_clock();
rcu_stats.nlast++;
- if (!eob && !rcu_timestamp
- && ((rcu_now - rcu_timestamp) > (s64)rcu_wdog * NSEC_PER_SEC)) {
- rcu_stats.nforced++;
- for_each_online_cpu(cpu) {
- if (rcu_data[cpu].wait)
- force_cpu_resched(cpu);
- }
- rcu_timestamp = rcu_now;
- }
- /*
- * Just return if the current batch has not yet
- * ended.
- */
-
- if (!eob)
- return;
-
/*
- * Batch has ended. First, restart watchdog.
+ * Exit if batch has not ended. But first, tickle all non-cooperating
+ * CPUs if enough time has passed.
*/
- rcu_timestamp = rcu_now;
+ if (eob == 0) {
+ if (rcu_wdog_ctr >= rcu_wdog_lim) {
+ rcu_wdog_ctr = 0;
+ rcu_stats.nforced++;
+ for_each_online_cpu(cpu) {
+ if (rcu_data[cpu].wait)
+ force_cpu_resched(cpu);
+ }
+ }
+ rcu_wdog_ctr += rcu_hz_period_us;
+ return eob;
+ }
/*
* End the current RCU batch and start a new one.
@@ -391,8 +380,10 @@ static void __rcu_delimit_batches(struct
* counter until the results of that xchg are visible on other cpus.
*/
xchg(&rcu_which, prev); /* only place where rcu_which is written to */
+
rcu_stats.nbatches++;
rcu_stats.nlast = 0;
+ rcu_wdog_ctr = 0;
}
static void rcu_delimit_batches(void)
@@ -580,14 +571,14 @@ late_initcall(jrcud_start);
static int rcu_debugfs_show(struct seq_file *m, void *unused)
{
- int cpu, q, msecs;
-
- raw_local_irq_disable();
- msecs = div_s64(sched_clock() - rcu_timestamp, NSEC_PER_MSEC);
- raw_local_irq_enable();
+ int cpu, q;
seq_printf(m, "%14u: hz\n", rcu_hz);
- seq_printf(m, "%14u: watchdog (secs)\n", rcu_wdog);
+
+ seq_printf(m, "%14u: watchdog (secs)\n", rcu_wdog_lim / (int)USEC_PER_SEC);
+ seq_printf(m, "%14d: #secs left on watchdog\n",
+ (rcu_wdog_lim - rcu_wdog_ctr) / (int)USEC_PER_SEC);
+
#ifdef CONFIG_JRCU_DAEMON
if (rcu_daemon)
seq_printf(m, "%14u: daemon priority\n", rcu_priority);
@@ -604,8 +595,6 @@ static int rcu_debugfs_show(struct seq_f
rcu_stats.npasses - rcu_stats.nbatches);
seq_printf(m, "%14u: #passes since last end-of-batch\n",
rcu_stats.nlast);
- seq_printf(m, "%14u: #msecs since last end-of-batch\n",
- msecs);
seq_printf(m, "%14u: #passes forced (0 is best)\n",
rcu_stats.nforced);
@@ -698,7 +687,7 @@ static ssize_t rcu_debugfs_write(struct
sscanf(&token[5], "%d", &wdog);
if (wdog < 3 || wdog > 1000)
return -EINVAL;
- rcu_wdog = wdog;
+ rcu_wdog_lim = wdog * USEC_PER_SEC;
} else
return -EINVAL;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists