linux-kernel - Re: [PATCH v6 4/5] cpufreq: qcom: Update the bandwidth levels on frequency change

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200615172553.GU4525@google.com>
Date:   Mon, 15 Jun 2020 10:25:53 -0700
From:   Matthias Kaehlcke <mka@...omium.org>
To:     Sibi Sankar <sibis@...eaurora.org>
Cc:     viresh.kumar@...aro.org, sboyd@...nel.org,
        georgi.djakov@...aro.org, saravanak@...gle.com, nm@...com,
        bjorn.andersson@...aro.org, agross@...nel.org, rjw@...ysocki.net,
        linux-arm-msm@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-pm@...r.kernel.org, dianders@...omium.org,
        vincent.guittot@...aro.org, amit.kucheria@...aro.org,
        lukasz.luba@....com, sudeep.holla@....com, smasetty@...eaurora.org
Subject: Re: [PATCH v6 4/5] cpufreq: qcom: Update the bandwidth levels on
 frequency change

Hi Sibi,

On Sat, Jun 06, 2020 at 03:03:31AM +0530, Sibi Sankar wrote:
> Add support to parse optional OPP table attached to the cpu node when
> the OPP bandwidth values are populated. This allows for scaling of
> DDR/L3 bandwidth levels with frequency change.
> 
> Signed-off-by: Sibi Sankar <sibis@...eaurora.org>
> ---
> 
> v6:
>  * Add global flag to distinguish between voltage update and opp add.
>    Use the same flag before trying to scale ddr/l3 bw [Viresh]
>  * Use dev_pm_opp_find_freq_ceil to grab all opps [Viresh] 
>  * Move dev_pm_opp_of_find_icc_paths into probe [Viresh]
> 
> v5:
>  * Use dev_pm_opp_adjust_voltage instead [Viresh]
>  * Misc cleanup
> 
> v4:
>  * Split fast switch disable into another patch [Lukasz]
> 
>  drivers/cpufreq/qcom-cpufreq-hw.c | 82 ++++++++++++++++++++++++++++++-
>  1 file changed, 80 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
> index fc92a8842e252..8fa6ab6e0e4b6 100644
> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
> @@ -6,6 +6,7 @@
>  #include <linux/bitfield.h>
>  #include <linux/cpufreq.h>
>  #include <linux/init.h>
> +#include <linux/interconnect.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/of_address.h>
> @@ -30,6 +31,48 @@
>  
>  static unsigned long cpu_hw_rate, xo_rate;
>  static struct platform_device *global_pdev;
> +static bool icc_scaling_enabled;

It seem you rely on 'icc_scaling_enabled' to be initialized to 'false'.
This works during the first initialization, but not if the 'device' is
unbound/rebound. In theory things shouldn't be different in a succesive
initialization, however for robustness the variable should be explicitly
set to 'false' somewhere in the code path (_probe(), _read_lut(), ...).

> +static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
> +			       unsigned long freq_khz)
> +{
> +	unsigned long freq_hz = freq_khz * 1000;
> +	struct dev_pm_opp *opp;
> +	struct device *dev;
> +	int ret;
> +
> +	dev = get_cpu_device(policy->cpu);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
> +	if (IS_ERR(opp))
> +		return PTR_ERR(opp);
> +
> +	ret = dev_pm_opp_set_bw(dev, opp);
> +	dev_pm_opp_put(opp);
> +	return ret;
> +}
> +
> +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
> +				   unsigned long freq_khz,
> +				   unsigned long volt)
> +{
> +	unsigned long freq_hz = freq_khz * 1000;
> +	int ret;
> +
> +	/* Skip voltage update if the opp table is not available */
> +	if (!icc_scaling_enabled)
> +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
> +
> +	ret = dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt);
> +	if (ret) {
> +		dev_err(cpu_dev, "Voltage update failed freq=%ld\n", freq_khz);
> +		return ret;
> +	}
> +
> +	return dev_pm_opp_enable(cpu_dev, freq_hz);
> +}
>  
>  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
>  					unsigned int index)
> @@ -39,6 +82,9 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
>  
>  	writel_relaxed(index, perf_state_reg);
>  
> +	if (icc_scaling_enabled)
> +		qcom_cpufreq_set_bw(policy, freq);
> +
>  	arch_set_freq_scale(policy->related_cpus, freq,
>  			    policy->cpuinfo.max_freq);
>  	return 0;
> @@ -89,11 +135,31 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
>  	u32 volt;
>  	struct cpufreq_frequency_table	*table;
> +	struct dev_pm_opp *opp;
> +	unsigned long rate;
> +	int ret;
>  
>  	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
>  	if (!table)
>  		return -ENOMEM;
>  
> +	ret = dev_pm_opp_of_add_table(cpu_dev);
> +	if (!ret) {
> +		/* Disable all opps and cross-validate against LUT */

nit: IIUC the cross-validation doesn't happen in this branch, so the
comment is a bit misleading. Maybe change it to "Disable all opps to
cross-validate against the LUT {below,later}".

> +		icc_scaling_enabled = true;
> +		for (rate = 0; ; rate++) {
> +			opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
> +			if (IS_ERR(opp))
> +				break;
> +
> +			dev_pm_opp_put(opp);
> +			dev_pm_opp_disable(cpu_dev, rate);
> +		}
> +	} else if (ret != -ENODEV) {
> +		dev_err(cpu_dev, "Invalid opp table in device tree\n");
> +		return ret;
> +	}
> +
>  	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
>  		data = readl_relaxed(base + REG_FREQ_LUT +
>  				      i * LUT_ROW_SIZE);
> @@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  
>  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
>  			table[i].frequency = freq;
> -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
> +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);

This is the cross-validation mentioned above, right? Shouldn't it include
a check of the return value?

>  			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
>  				freq, core_count);
>  		} else if (core_count == LUT_TURBO_IND) {
> @@ -133,7 +199,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
>  				prev->frequency = prev_freq;
>  				prev->flags = CPUFREQ_BOOST_FREQ;
> -				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
> +				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
> +							volt);

ditto

nit: with the updated max line length it isn't necessary anymore to break
this into multiple lines (https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/checkpatch.pl?h=v5.8-rc1#n54),
though the coding style still has the old limit.