lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130628025723.GA12719@amt.cnet>
Date:	Thu, 27 Jun 2013 23:57:23 -0300
From:	Marcelo Tosatti <mtosatti@...hat.com>
To:	kvm-devel <kvm@...r.kernel.org>, linux-kernel@...r.kernel.org
Cc:	Karen Noel <knoel@...hat.com>, Rik van Riel <riel@...hat.com>,
	Don Zickus <dzickus@...hat.com>,
	Prarit Bhargava <prarit@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>
Subject: watchdog: print stolen time increment at softlockup detection


One possibility for a softlockup report in a Linux VM, is that the host
system is overcommitted to the point where the watchdog task is unable
to make progress (unable to touch the watchdog).

Maintain the increment in stolen time for the period of 
softlockup threshold detection (20 seconds by the default), 
and report this increment in the softlockup message.

Overcommitment is then indicated by a large stolen time increment,
accounting for more than, or for a significant percentage of the
softlockup threshold.

Signed-off-by: Marcelo Tosatti <mtosatti@...hat.com>

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 05039e3..ed09d58 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -34,6 +34,8 @@ int __read_mostly watchdog_thresh = 10;
 static int __read_mostly watchdog_disabled;
 static u64 __read_mostly sample_period;
 
+#define SOFT_INTRS_PER_PERIOD 5
+
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
@@ -127,9 +129,51 @@ static void set_sample_period(void)
 	 * and hard thresholds) to increment before the
 	 * hardlockup detector generates a warning
 	 */
-	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
+	sample_period = get_softlockup_thresh() *
+			((u64)NSEC_PER_SEC / SOFT_INTRS_PER_PERIOD);
 }
 
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct steal_clock_record {
+	u64 prev_stolen_time;
+	u64 stolen_time[SOFT_INTRS_PER_PERIOD];
+	int idx;
+};
+
+static DEFINE_PER_CPU(struct steal_clock_record, steal_record);
+static void record_steal_time(void)
+{
+	struct steal_clock_record *r;
+	int cpu = smp_processor_id();
+	u64 steal_time;
+	r = &per_cpu(steal_record, cpu);
+
+	steal_time = paravirt_steal_clock(cpu);
+	r->stolen_time[r->idx] = steal_time - r->prev_stolen_time;
+	r->idx++;
+	if (r->idx == SOFT_INTRS_PER_PERIOD)
+		r->idx = 0;
+	r->prev_stolen_time = steal_time;
+}
+
+static unsigned int get_accumulated_steal(int cpu)
+{
+	int idx;
+	u64 t = 0;
+	struct steal_clock_record *r = &per_cpu(steal_record, cpu);
+
+	for (idx = 0; idx < SOFT_INTRS_PER_PERIOD; idx++)
+		t += r->stolen_time[idx];
+
+	do_div(t, 1000000);
+
+	return t;
+}
+
+#else
+static void record_steal_time(void) { return; }
+#endif
+
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
@@ -271,6 +315,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	/* kick the hardlockup detector */
 	watchdog_interrupt_count();
 
+	/* record steal time */
+	record_steal_time();
+
 	/* kick the softlockup detector */
 	wake_up_process(__this_cpu_read(softlockup_watchdog));
 
@@ -316,6 +363,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+		printk(KERN_EMERG "soft lockup stolen time = %ums\n",
+			get_accumulated_steal(smp_processor_id()));
+#endif
 		print_modules();
 		print_irqtrace_events(current);
 		if (regs)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ