[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3300400.aeNJFYEL58@rjwysocki.net>
Date: Fri, 02 Aug 2024 20:18:57 +0200
From: "Rafael J. Wysocki" <rjw@...ysocki.net>
To: x86 Maintainers <x86@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>, Linux PM <linux-pm@...r.kernel.org>,
Thomas Gleixner <tglx@...utronix.de>, Peter Zijlstra <peterz@...radead.org>,
Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
"Rafael J. Wysocki" <rafael@...nel.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Ricardo Neri <ricardo.neri@...el.com>, Tim Chen <tim.c.chen@...el.com>
Subject: [PATCH v1 2/3] x86/sched: Add basic support for CPU capacity scaling
From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
In order be able to compute the sizes of tasks consistently across all
CPUs in a hybrid system, it is necessary to provide CPU capacity scaling
information to the scheduler via arch_scale_cpu_capacity(). Moreover,
the value returned by arch_scale_freq_capacity() for the given CPU must
correspond to the arch_scale_cpu_capacity() return value for it, or
utilization computations will be inaccurate.
Add support for it through per-CPU variables holding the capacity and
maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will
be returned by arch_scale_cpu_capacity() and used by scale_freq_tick()
to compute arch_freq_scale for the current CPU, respectively.
In order to avoid adding measurable overhead for non-hybrid x86 systems,
which are the vast majority in the field, whether or not the new hybrid
CPU capacity scaling will be in effect is controlled by a static key.
This static key is set by calling arch_enable_hybrid_capacity_scale()
which also allocates memory for the per-CPU data and initializes it.
Next, arch_set_cpu_capacity() is used to set the per-CPU variables
mentioned above for each CPU and arch_rebuild_sched_domains() needs
to be called for the scheduler to realize that capacity-aware
scheduling can be used going forward.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
---
arch/x86/include/asm/topology.h | 13 +++++
arch/x86/kernel/cpu/aperfmperf.c | 91 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 103 insertions(+), 1 deletion(-)
Index: linux-pm/arch/x86/include/asm/topology.h
===================================================================
--- linux-pm.orig/arch/x86/include/asm/topology.h
+++ linux-pm/arch/x86/include/asm/topology.h
@@ -280,11 +280,24 @@ static inline long arch_scale_freq_capac
}
#define arch_scale_freq_capacity arch_scale_freq_capacity
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long base_cap,
+ unsigned long max_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
extern void arch_set_max_freq_ratio(bool turbo_disabled);
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
void arch_rebuild_sched_domains(void);
#else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+ unsigned long base_cap,
+ unsigned long max_freq,
+ unsigned long base_freq) { }
+
static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
Index: linux-pm/arch/x86/kernel/cpu/aperfmperf.c
===================================================================
--- linux-pm.orig/arch/x86/kernel/cpu/aperfmperf.c
+++ linux-pm/arch/x86/kernel/cpu/aperfmperf.c
@@ -300,6 +300,8 @@ static void register_freq_invariance_sys
static inline void register_freq_invariance_syscore_ops(void) {}
#endif
+static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);
+
static void freq_invariance_enable(void)
{
if (static_branch_unlikely(&arch_scale_freq_key)) {
@@ -332,6 +334,7 @@ static void disable_freq_invariance_work
int cpu;
static_branch_disable(&arch_scale_freq_key);
+ static_branch_disable(&arch_hybrid_cap_scale_key);
/*
* Set arch_freq_scale to a default value on all cpus
@@ -347,9 +350,90 @@ static DECLARE_WORK(disable_freq_invaria
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
+struct arch_hybrid_cpu_scale {
+ unsigned long capacity;
+ unsigned long freq_ratio;
+};
+
+static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;
+
+/**
+ * arch_enable_hybrid_capacity_scale - Enable hybrid CPU capacity scaling
+ *
+ * Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
+ * initialize it and set the static key controlling its code paths.
+ *
+ * Must be called before arch_set_cpu_capacity().
+ */
+bool arch_enable_hybrid_capacity_scale(void)
+{
+ int cpu;
+
+ if (!arch_scale_freq_invariant())
+ return false;
+
+ if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+ arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
+ if (!arch_cpu_scale)
+ return false;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
+ per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
+ }
+
+ static_branch_enable(&arch_hybrid_cap_scale_key);
+
+ pr_info("Hybrid CPU capacity scaling enabled\n");
+
+ return true;
+}
+
+/**
+ * arch_set_cpu_capacity - Set scale-invariance parameters for a CPU
+ * @cpu: Target CPU.
+ * @cap: Capacity of @cpu, relative to @base_cap, at its maximum frequency.
+ * @base_cap: System-wide maximum CPU capacity.
+ * @max_freq: Frequency of @cpu corresponding to @cap.
+ * @base_freq: Frequency of @cpu at which APERF and MPERF count.
+ *
+ * The units in which @cap and @base_cap are expressed do not matter, so long
+ * as they are consistent, because the former is effectively divided by the
+ * latter. Analogously for @max_freq and @base_freq.
+ *
+ * After calling this function for all CPUs, call arch_rebuild_sched_domains()
+ * to let the scheduler know that capacity-aware scheduling can be used going
+ * forward.
+ */
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long base_cap,
+ unsigned long max_freq, unsigned long base_freq)
+{
+ if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
+ WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
+ div_u64(cap << SCHED_CAPACITY_SHIFT, base_cap));
+ WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
+ div_u64(max_freq << SCHED_CAPACITY_SHIFT, base_freq));
+ } else {
+ WARN_ON_ONCE(1);
+ }
+}
+
+unsigned long arch_scale_cpu_capacity(int cpu)
+{
+ if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
+ return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);
+
+ return SCHED_CAPACITY_SCALE;
+}
+EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);
+
static void scale_freq_tick(u64 acnt, u64 mcnt)
{
u64 freq_scale;
+ u64 freq_ratio;
if (!arch_scale_freq_invariant())
return;
@@ -357,7 +441,12 @@ static void scale_freq_tick(u64 acnt, u6
if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
goto error;
- if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+ if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
+ freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
+ else
+ freq_ratio = arch_max_freq_ratio;
+
+ if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
goto error;
freq_scale = div64_u64(acnt, mcnt);
Powered by blists - more mailing lists