[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220311011715.2440601-1-eric.dumazet@gmail.com>
Date: Thu, 10 Mar 2022 17:17:15 -0800
From: Eric Dumazet <eric.dumazet@...il.com>
To: "Paul E . McKenney" <paulmck@...nel.org>
Cc: linux-kernel <linux-kernel@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>,
Eric Dumazet <eric.dumazet@...il.com>,
"Rafael J . Wysocki" <rafael.j.wysocki@...el.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
"H. Peter Anvin" <hpa@...or.com>, x86@...nel.org
Subject: [PATCH] x86/cpu: use smp_call_function_many() in arch_freq_prepare_all()
From: Eric Dumazet <edumazet@...gle.com>
Opening /proc/cpuinfo can have a big latency on hosts with many cpus,
mostly because it is essentially doing:
for_each_online_cpu(cpu)
smp_call_function_single(cpu, aperfmperf_snapshot_khz, ...)
smp_call_function_single() is reusing a common csd, meaning that
each invocation needs to wait for completion of the prior one.
Paul recent patches have lowered number of cpus receiving the IPI,
but there are still cases where the latency of the above loop can
reach 10 ms, then an extra msleep(10) is performed, for a total of 20ms.
Using smp_call_function_many() allows for full parallelism,
and latency is down to ~80 usec, on a host with 256 cpus.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Cc: Paul E. McKenney <paulmck@...nel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Borislav Petkov <bp@...en8.de>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: <x86@...nel.org>
---
arch/x86/kernel/cpu/aperfmperf.c | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 22911deacb6e441ad60ddb57190ef3772afb3cf0..a305310ceb44784a0ad9be7c196061d98fa1adbc 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -67,7 +67,8 @@ static void aperfmperf_snapshot_khz(void *dummy)
atomic_set_release(&s->scfpending, 0);
}
-static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait,
+ struct cpumask *mask)
{
s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
@@ -76,9 +77,13 @@ static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
return true;
- if (!atomic_xchg(&s->scfpending, 1) || wait)
- smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
-
+ if (!atomic_xchg(&s->scfpending, 1) || wait) {
+ if (mask)
+ __cpumask_set_cpu(cpu, mask);
+ else
+ smp_call_function_single(cpu, aperfmperf_snapshot_khz,
+ NULL, wait);
+ }
/* Return false if the previous iteration was too long ago. */
return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
}
@@ -97,13 +102,14 @@ unsigned int aperfmperf_get_khz(int cpu)
if (rcu_is_idle_cpu(cpu))
return 0; /* Idle CPUs are completely uninteresting. */
- aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
+ aperfmperf_snapshot_cpu(cpu, ktime_get(), true, NULL);
return per_cpu(samples.khz, cpu);
}
void arch_freq_prepare_all(void)
{
ktime_t now = ktime_get();
+ cpumask_var_t mask;
bool wait = false;
int cpu;
@@ -113,17 +119,25 @@ void arch_freq_prepare_all(void)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return;
+
+ cpus_read_lock();
for_each_online_cpu(cpu) {
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
continue;
if (rcu_is_idle_cpu(cpu))
continue; /* Idle CPUs are completely uninteresting. */
- if (!aperfmperf_snapshot_cpu(cpu, now, false))
+ if (!aperfmperf_snapshot_cpu(cpu, now, false, mask))
wait = true;
}
- if (wait)
- msleep(APERFMPERF_REFRESH_DELAY_MS);
+ preempt_disable();
+ smp_call_function_many(mask, aperfmperf_snapshot_khz, NULL, wait);
+ preempt_enable();
+ cpus_read_unlock();
+
+ free_cpumask_var(mask);
}
unsigned int arch_freq_get_on_cpu(int cpu)
@@ -139,7 +153,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
return 0;
- if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
+ if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true, NULL))
return per_cpu(samples.khz, cpu);
msleep(APERFMPERF_REFRESH_DELAY_MS);
--
2.35.1.723.g4982287a31-goog
Powered by blists - more mailing lists