lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1461345557-2763-5-git-send-email-james.hogan@imgtec.com>
Date:	Fri, 22 Apr 2016 18:19:17 +0100
From:	James Hogan <james.hogan@...tec.com>
To:	Ralf Baechle <ralf@...ux-mips.org>
CC:	James Hogan <james.hogan@...tec.com>,
	Daniel Lezcano <daniel.lezcano@...aro.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	<linux-mips@...ux-mips.org>, <linux-kernel@...r.kernel.org>
Subject: [PATCH 4/4] MIPS: cevt-r4k: Dynamically calculate min_delta_ns

Calculate the MIPS clockevent device's min_delta_ns dynamically based on
the time it takes to perform the mips_next_event() sequence.

Virtualisation in particular makes the current fixed min_delta of 0x300
inappropriate under some circumstances, as the CP0_Count and CP0_Compare
registers may be being emulated by the hypervisor, and the frequency may
not correspond directly to the CPU frequency.

We actually use twice the median of multiple 75th percentiles of
multiple measurements of how long the mips_next_event() sequence takes,
in order to fairly efficiently eliminate outliers due to unexpected
hypervisor latency (which would need handling with retries when it
occurs during normal operation anyway).

Signed-off-by: James Hogan <james.hogan@...tec.com>
Cc: Ralf Baechle <ralf@...ux-mips.org>
Cc: Daniel Lezcano <daniel.lezcano@...aro.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: linux-mips@...ux-mips.org
Cc: linux-kernel@...r.kernel.org
---
 arch/mips/kernel/cevt-r4k.c | 82 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 8dfe6a6e1480..e4c21bbf9422 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -28,6 +28,83 @@ static int mips_next_event(unsigned long delta,
 	return res;
 }
 
+/**
+ * calculate_min_delta() - Calculate a good minimum delta for mips_next_event().
+ *
+ * Running under virtualisation can introduce overhead into mips_next_event() in
+ * the form of hypervisor emulation of CP0_Count/CP0_Compare registers,
+ * potentially with an unnatural frequency, which makes a fixed min_delta_ns
+ * value inappropriate as it may be too small.
+ *
+ * It can also introduce occasional latency from the guest being descheduled.
+ *
+ * This function calculates a good minimum delta based roughly on the 75th
+ * percentile of the time taken to do the mips_next_event() sequence, in order
+ * to handle potentially higher overhead while also eliminating outliers due to
+ * unpredictable hypervisor latency (which can be handled by retries).
+ *
+ * Return:	An appropriate minimum delta for the clock event device.
+ */
+static unsigned int calculate_min_delta(void)
+{
+	unsigned int cnt, i, j, k, l;
+	unsigned int buf1[4], buf2[3];
+	unsigned int min_delta;
+
+	/*
+	 * Calculate the median of 5 75th percentiles of 5 samples of how long
+	 * it takes to set CP0_Compare = CP0_Count + delta.
+	 */
+	for (i = 0; i < 5; ++i) {
+		for (j = 0; j < 5; ++j) {
+			/*
+			 * This is like the code in mips_next_event(), and
+			 * directly measures the borderline "safe" delta.
+			 */
+			cnt = read_c0_count();
+			write_c0_compare(cnt);
+			cnt = read_c0_count() - cnt;
+
+			/* Sorted insert into buf1 */
+			for (k = 0; k < j; ++k) {
+				if (cnt < buf1[k]) {
+					l = min_t(unsigned int,
+						  j, ARRAY_SIZE(buf1) - 1);
+					for (; l > k; --l)
+						buf1[l] = buf1[l - 1];
+					break;
+				}
+			}
+			if (k < ARRAY_SIZE(buf1))
+				buf1[k] = cnt;
+		}
+
+		/* Sorted insert of 75th percentile into buf2 */
+		for (k = 0; k < i; ++k) {
+			if (buf1[ARRAY_SIZE(buf1) - 1] < buf2[k]) {
+				l = min_t(unsigned int,
+					  i, ARRAY_SIZE(buf2) - 1);
+				for (; l > k; --l)
+					buf2[l] = buf2[l - 1];
+				break;
+			}
+		}
+		if (k < ARRAY_SIZE(buf2))
+			buf2[k] = buf1[ARRAY_SIZE(buf1) - 1];
+	}
+
+	/* Use 2 * median of 75th percentiles */
+	min_delta = buf2[ARRAY_SIZE(buf2) - 1] * 2;
+
+	/* Don't go too low */
+	if (min_delta < 0x300)
+		min_delta = 0x300;
+
+	pr_debug("%s: median 75th percentile=%#x, min_delta=%#x\n",
+		 __func__, buf2[ARRAY_SIZE(buf2) - 1], min_delta);
+	return min_delta;
+}
+
 DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 int cp0_timer_irq_installed;
 
@@ -177,7 +254,7 @@ int r4k_clockevent_init(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct clock_event_device *cd;
-	unsigned int irq;
+	unsigned int irq, min_delta;
 
 	if (!cpu_has_counter || !mips_hpt_frequency)
 		return -ENXIO;
@@ -203,7 +280,8 @@ int r4k_clockevent_init(void)
 
 	/* Calculate the min / max delta */
 	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
+	min_delta		= calculate_min_delta();
+	cd->min_delta_ns	= clockevent_delta2ns(min_delta, cd);
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-- 
2.4.10

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ