lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231217180016.wkkatrjuanuk5x52@airbuntu>
Date: Sun, 17 Dec 2023 18:00:16 +0000
From: Qais Yousef <qyousef@...alina.io>
To: Lukasz Luba <lukasz.luba@....com>
Cc: linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org,
	rafael@...nel.org, dietmar.eggemann@....com, rui.zhang@...el.com,
	amit.kucheria@...durent.com, amit.kachhap@...il.com,
	daniel.lezcano@...aro.org, viresh.kumar@...aro.org,
	len.brown@...el.com, pavel@....cz, mhiramat@...nel.org,
	wvw@...gle.com
Subject: Re: [PATCH v5 13/23] PM: EM: Add performance field to struct
 em_perf_state

On 11/29/23 11:08, Lukasz Luba wrote:
> The performance doesn't scale linearly with the frequency. Also, it may
> be different in different workloads. Some CPUs are designed to be
> particularly good at some applications e.g. images or video processing
> and other CPUs in different. When those different types of CPUs are
> combined in one SoC they should be properly modeled to get max of the HW
> in Energy Aware Scheduler (EAS). The Energy Model (EM) provides the
> power vs. performance curves to the EAS, but assumes the CPUs capacity
> is fixed and scales linearly with the frequency. This patch allows to
> adjust the curve on the 'performance' axis as well.
> 
> Signed-off-by: Lukasz Luba <lukasz.luba@....com>
> ---
>  include/linux/energy_model.h | 11 ++++++-----
>  kernel/power/energy_model.c  | 27 +++++++++++++++++++++++++++
>  2 files changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
> index ae3ccc8b9f44..e30750500b10 100644
> --- a/include/linux/energy_model.h
> +++ b/include/linux/energy_model.h
> @@ -13,6 +13,7 @@
>  
>  /**
>   * struct em_perf_state - Performance state of a performance domain
> + * @performance:	Non-linear CPU performance at a given frequency
>   * @frequency:	The frequency in KHz, for consistency with CPUFreq
>   * @power:	The power consumed at this level (by 1 CPU or by a registered
>   *		device). It can be a total power: static and dynamic.
> @@ -21,6 +22,7 @@
>   * @flags:	see "em_perf_state flags" description below.
>   */
>  struct em_perf_state {
> +	unsigned long performance;
>  	unsigned long frequency;
>  	unsigned long power;
>  	unsigned long cost;
> @@ -207,14 +209,14 @@ void em_free_table(struct em_perf_table __rcu *table);
>   */
>  static inline int
>  em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
> -			  unsigned long freq, unsigned long pd_flags)
> +			  unsigned long max_util, unsigned long pd_flags)
>  {
>  	struct em_perf_state *ps;
>  	int i;
>  
>  	for (i = 0; i < nr_perf_states; i++) {
>  		ps = &table[i];
> -		if (ps->frequency >= freq) {
> +		if (ps->performance >= max_util) {
>  			if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
>  			    ps->flags & EM_PERF_STATE_INEFFICIENT)
>  				continue;
> @@ -246,8 +248,8 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
>  				unsigned long allowed_cpu_cap)
>  {
>  	struct em_perf_table *runtime_table;
> -	unsigned long freq, scale_cpu;
>  	struct em_perf_state *ps;
> +	unsigned long scale_cpu;
>  	int cpu, i;
>  
>  	if (!sum_util)
> @@ -274,14 +276,13 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
>  
>  	max_util = map_util_perf(max_util);
>  	max_util = min(max_util, allowed_cpu_cap);
> -	freq = map_util_freq(max_util, ps->frequency, scale_cpu);
>  
>  	/*
>  	 * Find the lowest performance state of the Energy Model above the
>  	 * requested frequency.
>  	 */
>  	i = em_pd_get_efficient_state(runtime_table->state, pd->nr_perf_states,
> -				      freq, pd->flags);
> +				      max_util, pd->flags);
>  	ps = &runtime_table->state[i];
>  
>  	/*
> diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
> index 614891fde8df..b5016afe6a19 100644
> --- a/kernel/power/energy_model.c
> +++ b/kernel/power/energy_model.c
> @@ -46,6 +46,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
>  	debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
>  	debugfs_create_ulong("power", 0444, d, &ps->power);
>  	debugfs_create_ulong("cost", 0444, d, &ps->cost);
> +	debugfs_create_ulong("performance", 0444, d, &ps->performance);
>  	debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
>  }
>  
> @@ -171,6 +172,30 @@ em_allocate_table(struct em_perf_domain *pd)
>  	return table;
>  }
>  
> +static void em_init_performance(struct device *dev, struct em_perf_domain *pd,
> +				struct em_perf_state *table, int nr_states)
> +{
> +	u64 fmax, max_cap;
> +	int i, cpu;
> +
> +	/* This is needed only for CPUs and EAS skip other devices */
> +	if (!_is_cpu_device(dev))
> +		return;
> +
> +	cpu = cpumask_first(em_span_cpus(pd));
> +
> +	/*
> +	 * Calculate the performance value for each frequency with
> +	 * linear relationship. The final CPU capacity might not be ready at
> +	 * boot time, but the EM will be updated a bit later with correct one.
> +	 */
> +	fmax = (u64) table[nr_states - 1].frequency;
> +	max_cap = (u64) arch_scale_cpu_capacity(cpu);
> +	for (i = 0; i < nr_states; i++)
> +		table[i].performance = div64_u64(max_cap * table[i].frequency,
> +						 fmax);

Should we sanity check the returned performance value is correct in case we got
passed a malformed table? Maybe the table is sanity checked and sorted before
we get here; I didn't check to be honest.

I think a warning that performance is always <= max_cap would be helpful in
general as code evolved in the future.


Cheers

--
Qais Yousef

> +}
> +
>  static int em_compute_costs(struct device *dev, struct em_perf_state *table,
>  			    struct em_data_callback *cb, int nr_states,
>  			    unsigned long flags)
> @@ -331,6 +356,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
>  		table[i].frequency = prev_freq = freq;
>  	}
>  
> +	em_init_performance(dev, pd, table, nr_states);
> +
>  	ret = em_compute_costs(dev, table, cb, nr_states, flags);
>  	if (ret)
>  		return -EINVAL;
> -- 
> 2.25.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ