lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220311011715.2440601-1-eric.dumazet@gmail.com>
Date:   Thu, 10 Mar 2022 17:17:15 -0800
From:   Eric Dumazet <eric.dumazet@...il.com>
To:     "Paul E . McKenney" <paulmck@...nel.org>
Cc:     linux-kernel <linux-kernel@...r.kernel.org>,
        Eric Dumazet <edumazet@...gle.com>,
        Eric Dumazet <eric.dumazet@...il.com>,
        "Rafael J . Wysocki" <rafael.j.wysocki@...el.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        "H. Peter Anvin" <hpa@...or.com>, x86@...nel.org
Subject: [PATCH] x86/cpu: use smp_call_function_many() in arch_freq_prepare_all()

From: Eric Dumazet <edumazet@...gle.com>

Opening /proc/cpuinfo can have a big latency on hosts with many cpus,
mostly because it is essentially doing:

   for_each_online_cpu(cpu)
    smp_call_function_single(cpu, aperfmperf_snapshot_khz, ...)

smp_call_function_single() is reusing a common csd, meaning that
each invocation needs to wait for completion of the prior one.

Paul recent patches have lowered number of cpus receiving the IPI,
but there are still cases where the latency of the above loop can
reach 10 ms, then an extra msleep(10) is performed, for a total of 20ms.

Using smp_call_function_many() allows for full parallelism,
and latency is down to ~80 usec, on a host with 256 cpus.

Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Cc: Paul E. McKenney <paulmck@...nel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Borislav Petkov <bp@...en8.de>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: <x86@...nel.org>
---
 arch/x86/kernel/cpu/aperfmperf.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 22911deacb6e441ad60ddb57190ef3772afb3cf0..a305310ceb44784a0ad9be7c196061d98fa1adbc 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -67,7 +67,8 @@ static void aperfmperf_snapshot_khz(void *dummy)
 	atomic_set_release(&s->scfpending, 0);
 }
 
-static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait,
+				    struct cpumask *mask)
 {
 	s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
 	struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
@@ -76,9 +77,13 @@ static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
 	if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
 		return true;
 
-	if (!atomic_xchg(&s->scfpending, 1) || wait)
-		smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
-
+	if (!atomic_xchg(&s->scfpending, 1) || wait) {
+		if (mask)
+			__cpumask_set_cpu(cpu, mask);
+		else
+			smp_call_function_single(cpu, aperfmperf_snapshot_khz,
+						 NULL, wait);
+	}
 	/* Return false if the previous iteration was too long ago. */
 	return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
 }
@@ -97,13 +102,14 @@ unsigned int aperfmperf_get_khz(int cpu)
 	if (rcu_is_idle_cpu(cpu))
 		return 0; /* Idle CPUs are completely uninteresting. */
 
-	aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
+	aperfmperf_snapshot_cpu(cpu, ktime_get(), true, NULL);
 	return per_cpu(samples.khz, cpu);
 }
 
 void arch_freq_prepare_all(void)
 {
 	ktime_t now = ktime_get();
+	cpumask_var_t mask;
 	bool wait = false;
 	int cpu;
 
@@ -113,17 +119,25 @@ void arch_freq_prepare_all(void)
 	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
 		return;
 
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return;
+
+	cpus_read_lock();
 	for_each_online_cpu(cpu) {
 		if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
 			continue;
 		if (rcu_is_idle_cpu(cpu))
 			continue; /* Idle CPUs are completely uninteresting. */
-		if (!aperfmperf_snapshot_cpu(cpu, now, false))
+		if (!aperfmperf_snapshot_cpu(cpu, now, false, mask))
 			wait = true;
 	}
 
-	if (wait)
-		msleep(APERFMPERF_REFRESH_DELAY_MS);
+	preempt_disable();
+	smp_call_function_many(mask, aperfmperf_snapshot_khz, NULL, wait);
+	preempt_enable();
+	cpus_read_unlock();
+
+	free_cpumask_var(mask);
 }
 
 unsigned int arch_freq_get_on_cpu(int cpu)
@@ -139,7 +153,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
 	if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
 		return 0;
 
-	if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
+	if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true, NULL))
 		return per_cpu(samples.khz, cpu);
 
 	msleep(APERFMPERF_REFRESH_DELAY_MS);
-- 
2.35.1.723.g4982287a31-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ