linux-kernel - [patch V6 01/37] tracing/hwlat: Use ktime_get_mono_fast

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200515235124.466962628@linutronix.de>
Date:   Sat, 16 May 2020 01:45:48 +0200
From:   Thomas Gleixner <tglx@...utronix.de>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     x86@...nel.org, "Paul E. McKenney" <paulmck@...nel.org>,
        Andy Lutomirski <luto@...nel.org>,
        Alexandre Chartre <alexandre.chartre@...cle.com>,
        Frederic Weisbecker <frederic@...nel.org>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Petr Mladek <pmladek@...e.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Juergen Gross <jgross@...e.com>,
        Brian Gerst <brgerst@...il.com>,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Will Deacon <will@...nel.org>,
        Tom Lendacky <thomas.lendacky@....com>,
        Wei Liu <wei.liu@...nel.org>,
        Michael Kelley <mikelley@...rosoft.com>,
        Jason Chen CJ <jason.cj.chen@...el.com>,
        Zhao Yakui <yakui.zhao@...el.com>,
        "Peter Zijlstra (Intel)" <peterz@...radead.org>
Subject: [patch V6 01/37] tracing/hwlat: Use ktime_get_mono_fast_ns()

Timestamping in the hardware latency detector uses sched_clock() underneath
and depends on CONFIG_GENERIC_SCHED_CLOCK=n because sched clocks from that
subsystem are not NMI safe.

ktime_get_mono_fast_ns() is NMI safe and available on all architectures.

Replace the time getter, get rid of the CONFIG_GENERIC_SCHED_CLOCK=n
dependency and cleanup the horrible macro maze which encapsulates u64 math
in u64 macros.

Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
---
 kernel/trace/trace_hwlat.c |   59 +++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 34 deletions(-)

--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -131,29 +131,19 @@ static void trace_hwlat_sample(struct hw
 		trace_buffer_unlock_commit_nostack(buffer, event);
 }
 
-/* Macros to encapsulate the time capturing infrastructure */
-#define time_type	u64
-#define time_get()	trace_clock_local()
-#define time_to_us(x)	div_u64(x, 1000)
-#define time_sub(a, b)	((a) - (b))
-#define init_time(a, b)	(a = b)
-#define time_u64(a)	a
-
+/*
+ * Timestamping uses ktime_get_mono_fast(), the NMI safe access to
+ * CLOCK_MONOTONIC.
+ */
 void trace_hwlat_callback(bool enter)
 {
 	if (smp_processor_id() != nmi_cpu)
 		return;
 
-	/*
-	 * Currently trace_clock_local() calls sched_clock() and the
-	 * generic version is not NMI safe.
-	 */
-	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
-		if (enter)
-			nmi_ts_start = time_get();
-		else
-			nmi_total_ts += time_get() - nmi_ts_start;
-	}
+	if (enter)
+		nmi_ts_start = ktime_get_mono_fast_ns();
+	else
+		nmi_total_ts += ktime_get_mono_fast_ns() - nmi_ts_start;
 
 	if (enter)
 		nmi_count++;
@@ -165,20 +155,22 @@ void trace_hwlat_callback(bool enter)
  * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  * hardware-induced latency. Called with interrupts disabled and with
  * hwlat_data.lock held.
+ *
+ * Use ktime_get_mono_fast() here as well because it does not wait on the
+ * timekeeping seqcount like ktime_get_mono().
  */
 static int get_sample(void)
 {
 	struct trace_array *tr = hwlat_trace;
 	struct hwlat_sample s;
-	time_type start, t1, t2, last_t2;
+	u64 start, t1, t2, last_t2, thresh;
 	s64 diff, outer_diff, total, last_total = 0;
 	u64 sample = 0;
-	u64 thresh = tracing_thresh;
 	u64 outer_sample = 0;
 	int ret = -1;
 	unsigned int count = 0;
 
-	do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
+	thresh = div_u64(tracing_thresh, NSEC_PER_USEC);
 
 	nmi_cpu = smp_processor_id();
 	nmi_total_ts = 0;
@@ -188,18 +180,20 @@ static int get_sample(void)
 
 	trace_hwlat_callback_enabled = true;
 
-	init_time(last_t2, 0);
-	start = time_get(); /* start timestamp */
+	/* start timestamp */
+	start = ktime_get_mono_fast_ns();
 	outer_diff = 0;
+	last_t2 = 0;
 
 	do {
 
-		t1 = time_get();	/* we'll look for a discontinuity */
-		t2 = time_get();
+		/* we'll look for a discontinuity */
+		t1 = ktime_get_mono_fast_ns();
+		t2 = ktime_get_mono_fast_ns();
 
-		if (time_u64(last_t2)) {
+		if (last_t2) {
 			/* Check the delta from outer loop (t2 to next t1) */
-			outer_diff = time_to_us(time_sub(t1, last_t2));
+			outer_diff = div_u64(t1 - last_t2, NSEC_PER_USEC);
 			/* This shouldn't happen */
 			if (outer_diff < 0) {
 				pr_err(BANNER "time running backwards\n");
@@ -210,7 +204,8 @@ static int get_sample(void)
 		}
 		last_t2 = t2;
 
-		total = time_to_us(time_sub(t2, start)); /* sample width */
+		/* sample width */
+		total = div_u64(t2 - start, NSEC_PER_USEC);
 
 		/* Check for possible overflows */
 		if (total < last_total) {
@@ -220,7 +215,7 @@ static int get_sample(void)
 		last_total = total;
 
 		/* This checks the inner loop (t1 to t2) */
-		diff = time_to_us(time_sub(t2, t1));     /* current diff */
+		diff = div_u64(t2 - t1, NSEC_PER_USEC);
 
 		if (diff > thresh || outer_diff > thresh) {
 			if (!count)
@@ -251,15 +246,11 @@ static int get_sample(void)
 
 		ret = 1;
 
-		/* We read in microseconds */
-		if (nmi_total_ts)
-			do_div(nmi_total_ts, NSEC_PER_USEC);
-
 		hwlat_data.count++;
 		s.seqnum = hwlat_data.count;
 		s.duration = sample;
 		s.outer_duration = outer_sample;
-		s.nmi_total_ts = nmi_total_ts;
+		s.nmi_total_ts = div_u64(nmi_total_ts, NSEC_PER_USEC);
 		s.nmi_count = nmi_count;
 		s.count = count;
 		trace_hwlat_sample(&s);