lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 20 Jun 2018 17:27:00 -0400
From:   Pavel Tatashin <pasha.tatashin@...cle.com>
To:     steven.sistare@...cle.com, daniel.m.jordan@...cle.com,
        linux@...linux.org.uk, schwidefsky@...ibm.com,
        heiko.carstens@...ibm.com, john.stultz@...aro.org,
        sboyd@...eaurora.org, x86@...nel.org, linux-kernel@...r.kernel.org,
        mingo@...hat.com, tglx@...utronix.de, hpa@...or.com,
        douly.fnst@...fujitsu.com, peterz@...radead.org, prarit@...hat.com,
        feng.tang@...el.com, pmladek@...e.com, gnomes@...rguk.ukuu.org.uk
Subject: [PATCH v11 6/6] x86/tsc: use tsc early

We want to get timestamps and high resultion clock available to us as early
as possible in boot. But, native_sched_clock() outputs time based either on
tsc after tsc_init() is called later in boot, or using jiffies when clock
interrupts are enabled, which is also happens later in boot.

On the other hand, we know tsc frequency from as early as when
tsc_early_delay_calibrate() is called. So, we use the early tsc calibration
to output timestamps early. Later in boot when tsc_init() is called we
calibrate tsc again using more precise methods, and start using that.

Since sched_clock() is in a hot path, we want to make sure that no
regressions are introduced to this function after machine is booted, this
is why we are using static branch that is enabled by default, but is
disabled once we have initialized a permanent clock source.

Signed-off-by: Pavel Tatashin <pasha.tatashin@...cle.com>
---
 arch/x86/kernel/tsc.c | 64 ++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 654a01cc0358..1dd69612c69c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -39,6 +39,9 @@ EXPORT_SYMBOL(tsc_khz);
 static int __read_mostly tsc_unstable;
 
 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
+static DEFINE_STATIC_KEY_TRUE(tsc_early_enabled);
+
+static bool tsc_early_sched_clock;
 
 int tsc_clocksource_reliable;
 
@@ -133,22 +136,13 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 	return ns;
 }
 
-static void set_cyc2ns_scale(unsigned long khz, int cpu,
-			     unsigned long long tsc_now,
-			     unsigned long long sched_now)
+static void __set_cyc2ns_scale(unsigned long khz, int cpu,
+			       unsigned long long tsc_now,
+			       unsigned long long sched_now)
 {
-	unsigned long long ns_now;
+	unsigned long long ns_now = cycles_2_ns(tsc_now) + sched_now;
 	struct cyc2ns_data data;
 	struct cyc2ns *c2n;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	sched_clock_idle_sleep_event();
-
-	if (!khz)
-		goto done;
-
-	ns_now = cycles_2_ns(tsc_now) + sched_now;
 
 	/*
 	 * Compute a new multiplier as per the above comment and ensure our
@@ -178,22 +172,47 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu,
 	c2n->data[0] = data;
 	raw_write_seqcount_latch(&c2n->seq);
 	c2n->data[1] = data;
+}
+
+static void set_cyc2ns_scale(unsigned long khz, int cpu,
+			     unsigned long long tsc_now,
+			     unsigned long long sched_now)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	if (khz)
+		__set_cyc2ns_scale(khz, cpu, tsc_now, sched_now);
 
-done:
 	sched_clock_idle_wakeup_event();
 	local_irq_restore(flags);
 }
 
+static void __init sched_clock_early_init(unsigned int khz)
+{
+	cyc2ns_init(smp_processor_id());
+	__set_cyc2ns_scale(khz, smp_processor_id(), rdtsc(), 0);
+	tsc_early_sched_clock = true;
+}
+
+static void __init sched_clock_early_exit(void)
+{
+	static_branch_disable(&tsc_early_enabled);
+}
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
 u64 native_sched_clock(void)
 {
-	if (static_branch_likely(&__use_tsc)) {
-		u64 tsc_now = rdtsc();
+	if (static_branch_likely(&__use_tsc))
+		return cycles_2_ns(rdtsc());
 
-		/* return the value in ns */
-		return cycles_2_ns(tsc_now);
+	if (static_branch_unlikely(&tsc_early_enabled)) {
+		if (tsc_early_sched_clock)
+			return cycles_2_ns(rdtsc());
 	}
 
 	/*
@@ -1354,9 +1373,10 @@ void __init tsc_early_delay_calibrate(void)
 	lpj = tsc_khz * 1000;
 	do_div(lpj, HZ);
 	loops_per_jiffy = lpj;
+	sched_clock_early_init(tsc_khz);
 }
 
-void __init tsc_init(void)
+static void __init __tsc_init(void)
 {
 	u64 lpj, cyc, sch;
 	int cpu;
@@ -1433,6 +1453,12 @@ void __init tsc_init(void)
 	detect_art();
 }
 
+void __init tsc_init(void)
+{
+	__tsc_init();
+	sched_clock_early_exit();
+}
+
 #ifdef CONFIG_SMP
 /*
  * If we have a constant TSC and are using the TSC for the delay loop,
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ