lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240903135728.GY4723@noisy.programming.kicks-ass.net>
Date: Tue, 3 Sep 2024 15:57:28 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: "Rafael J. Wysocki" <rjw@...ysocki.net>
Cc: x86 Maintainers <x86@...nel.org>, LKML <linux-kernel@...r.kernel.org>,
	Linux PM <linux-pm@...r.kernel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
	"Rafael J. Wysocki" <rafael@...nel.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Ricardo Neri <ricardo.neri@...el.com>,
	Tim Chen <tim.c.chen@...el.com>
Subject: Re: [PATCH v3 1/2] x86/sched: Add basic support for CPU capacity
 scaling

On Wed, Aug 28, 2024 at 01:47:25PM +0200, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> 
> In order be able to compute the sizes of tasks consistently across all
> CPUs in a hybrid system, it is necessary to provide CPU capacity scaling
> information to the scheduler via arch_scale_cpu_capacity().  Moreover,
> the value returned by arch_scale_freq_capacity() for the given CPU must
> correspond to the arch_scale_cpu_capacity() return value for it, or
> utilization computations will be inaccurate.
> 
> Add support for it through per-CPU variables holding the capacity and
> maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will
> be returned by arch_scale_cpu_capacity() and used by scale_freq_tick()
> to compute arch_freq_scale for the current CPU, respectively.
> 
> In order to avoid adding measurable overhead for non-hybrid x86 systems,
> which are the vast majority in the field, whether or not the new hybrid
> CPU capacity scaling will be in effect is controlled by a static key.
> This static key is set by calling arch_enable_hybrid_capacity_scale()
> which also allocates memory for the per-CPU data and initializes it.
> Next, arch_set_cpu_capacity() is used to set the per-CPU variables
> mentioned above for each CPU and arch_rebuild_sched_domains() needs
> to be called for the scheduler to realize that capacity-aware
> scheduling can be used going forward.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>

Looks about right; would be good to hear from the AMD folks if they can
use it as is, but if not, it should be simple enough to fix up later.

Acked-by: Peter Zijlstra (Intel) <peterz@...radead.org>

> ---
>  arch/x86/include/asm/topology.h  |   13 +++++
>  arch/x86/kernel/cpu/aperfmperf.c |   89 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 100 insertions(+), 2 deletions(-)
> 
> Index: linux-pm/arch/x86/include/asm/topology.h
> ===================================================================
> --- linux-pm.orig/arch/x86/include/asm/topology.h
> +++ linux-pm/arch/x86/include/asm/topology.h
> @@ -282,9 +282,22 @@ static inline long arch_scale_freq_capac
>  }
>  #define arch_scale_freq_capacity arch_scale_freq_capacity
>  
> +bool arch_enable_hybrid_capacity_scale(void);
> +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
> +			   unsigned long cap_freq, unsigned long base_freq);
> +
> +unsigned long arch_scale_cpu_capacity(int cpu);
> +#define arch_scale_cpu_capacity arch_scale_cpu_capacity
> +
>  extern void arch_set_max_freq_ratio(bool turbo_disabled);
>  extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
>  #else
> +static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
> +static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
> +					 unsigned long max_cap,
> +					 unsigned long cap_freq,
> +					 unsigned long base_freq) { }
> +
>  static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
>  static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
>  #endif
> Index: linux-pm/arch/x86/kernel/cpu/aperfmperf.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/cpu/aperfmperf.c
> +++ linux-pm/arch/x86/kernel/cpu/aperfmperf.c
> @@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invaria
>  DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
>  EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
>  
> +static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);
> +
> +struct arch_hybrid_cpu_scale {
> +	unsigned long capacity;
> +	unsigned long freq_ratio;
> +};
> +
> +static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;
> +
> +/**
> + * arch_enable_hybrid_capacity_scale - Enable hybrid CPU capacity scaling
> + *
> + * Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
> + * initialize it and set the static key controlling its code paths.
> + *
> + * Must be called before arch_set_cpu_capacity().
> + */
> +bool arch_enable_hybrid_capacity_scale(void)
> +{
> +	int cpu;
> +
> +	if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
> +		WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
> +		return true;
> +	}
> +
> +	arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
> +	if (!arch_cpu_scale)
> +		return false;
> +
> +	for_each_possible_cpu(cpu) {
> +		per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
> +		per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
> +	}
> +
> +	static_branch_enable(&arch_hybrid_cap_scale_key);
> +
> +	pr_info("Hybrid CPU capacity scaling enabled\n");
> +
> +	return true;
> +}
> +
> +/**
> + * arch_set_cpu_capacity - Set scale-invariance parameters for a CPU
> + * @cpu: Target CPU.
> + * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
> + * @max_cap: System-wide maximum CPU capacity.
> + * @cap_freq: Frequency of @cpu corresponding to @cap.
> + * @base_freq: Frequency of @cpu at which MPERF counts.
> + *
> + * The units in which @cap and @max_cap are expressed do not matter, so long
> + * as they are consistent, because the former is effectively divided by the
> + * latter.  Analogously for @cap_freq and @base_freq.
> + *
> + * After calling this function for all CPUs, call arch_rebuild_sched_domains()
> + * to let the scheduler know that capacity-aware scheduling can be used going
> + * forward.
> + */
> +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
> +			   unsigned long cap_freq, unsigned long base_freq)
> +{
> +	if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
> +		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
> +			   div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
> +		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
> +			   div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
> +	} else {
> +		WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
> +	}
> +}
> +
> +unsigned long arch_scale_cpu_capacity(int cpu)
> +{
> +	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
> +		return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);
> +
> +	return SCHED_CAPACITY_SCALE;
> +}
> +EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);
> +
>  static void scale_freq_tick(u64 acnt, u64 mcnt)
>  {
> -	u64 freq_scale;
> +	u64 freq_scale, freq_ratio;
>  
>  	if (!arch_scale_freq_invariant())
>  		return;
> @@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u6
>  	if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
>  		goto error;
>  
> -	if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
> +	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
> +		freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
> +	else
> +		freq_ratio = arch_max_freq_ratio;
> +
> +	if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
>  		goto error;
>  
>  	freq_scale = div64_u64(acnt, mcnt);
> 
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ