lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d6392b1af4ab459195a1954e4e5ad87e@viasat.com>
Date: Tue, 3 Sep 2024 17:51:34 +0000
From: "Jones, Morgan" <Morgan.Jones@...sat.com>
To: Mario Limonciello <mario.limonciello@....com>,
        Dhananjay Ugwekar
	<Dhananjay.Ugwekar@....com>,
        "rafael@...nel.org" <rafael@...nel.org>,
        "viresh.kumar@...aro.org" <viresh.kumar@...aro.org>,
        "gautham.shenoy@....com"
	<gautham.shenoy@....com>,
        "perry.yuan@....com" <perry.yuan@....com>,
        "skhan@...uxfoundation.org" <skhan@...uxfoundation.org>,
        "li.meng@....com"
	<li.meng@....com>,
        "ray.huang@....com" <ray.huang@....com>
CC: "linux-pm@...r.kernel.org" <linux-pm@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        David Arcari
	<darcari@...hat.com>
Subject: RE: [PATCH v2 2/2] cpufreq/amd-pstate: Fix the scaling_max_freq
 setting on shared memory CPPC systems

Hi there,

We are experiencing a ~35% performance regression on an AMD EPYC 7702 64 core machine after applying this patch. We observed Linux 6.6.44 starting to cause the issue, and performed a bisect involving rebooting the machine over 15 times. (Note that kexec didn't successfully identify the problem, since the PM memory mailbox is never reset).

It appears that we are getting a max of 2.18 GHz when this CPU can boost to 3.35 GHz, explaining the slowdown. Blacklisting amd-pstate solves the issue at the expense of the performance increase we used to get by using it.

Is it possible that the upper limits were implicitly at the max CPU power before, and setting the upper limit to something other than the boost frequency can reduce performance now?

# bad: [7213910600667c51c978e577bf5454d3f7b313b7] Linux 6.6.44
# good: [58b0425ff5df680d0b67f64ae1f3f1ebdf1c4de9] Linux 6.6.43
git bisect start '7213910600667c51c978e577bf5454d3f7b313b7' '58b0425ff5df680d0b67f64ae1f3f1ebdf1c4de9'
# good: [72ff9d26964a3a80f7650df719df139f5c1f965d] arm64: dts: qcom: sm6350: Add missing qcom,non-secure-domain property
git bisect good 72ff9d26964a3a80f7650df719df139f5c1f965d
# good: [0fffc2e1bf40a2220ef5a38f834ea063dba832d3] ARM: dts: sunxi: remove duplicated entries in makefile
git bisect good 0fffc2e1bf40a2220ef5a38f834ea063dba832d3
# bad: [8cdbe6ebfd1763a5c41a2a3058497c0a9163311c] pinctrl: renesas: r8a779g0: Fix CANFD5 suffix
git bisect bad 8cdbe6ebfd1763a5c41a2a3058497c0a9163311c
# bad: [5dbb98e7fa42bebc1325899193d8f13f0705a148] drm/mediatek: Turn off the layers with zero width or height
git bisect bad 5dbb98e7fa42bebc1325899193d8f13f0705a148
# bad: [691ec7043122c9c8c46d84f6e6cd85d13d50cd93] selftests/bpf: Null checks for links in bpf_tcp_ca
git bisect bad 691ec7043122c9c8c46d84f6e6cd85d13d50cd93
# bad: [a1359e085d75d7393a250054e66c0a7bc6c3dbfa] perf/x86: Serialize set_attr_rdpmc()
git bisect bad a1359e085d75d7393a250054e66c0a7bc6c3dbfa
# bad: [e99d9b16ff153de9540073239d24adc3b0a3a997] wifi: ath12k: change DMA direction while mapping reinjected packets
git bisect bad e99d9b16ff153de9540073239d24adc3b0a3a997
# bad: [d027ac4a08541beb2a89563d3e034da7085050ba] firmware: turris-mox-rwtm: Initialize completion before mailbox
git bisect bad d027ac4a08541beb2a89563d3e034da7085050ba
# bad: [e6c9eca327e6a41a81e7eba0d0ddc13da37f82a1] ARM: spitz: fix GPIO assignment for backlight
git bisect bad e6c9eca327e6a41a81e7eba0d0ddc13da37f82a1
# bad: [b8cdefdaa555bbfc269c2198803f8791a8923960] m68k: cmpxchg: Fix return value for default case in __arch_xchg()
git bisect bad b8cdefdaa555bbfc269c2198803f8791a8923960
# bad: [13a71384ae6a8779da809b00c6f378dcead10427] cpufreq/amd-pstate: Fix the scaling_max_freq setting on shared memory CPPC systems
git bisect bad 13a71384ae6a8779da809b00c6f378dcead10427
# first bad commit: [13a71384ae6a8779da809b00c6f378dcead10427] cpufreq/amd-pstate: Fix the scaling_max_freq setting on shared memory CPPC systems  

cpupower output:

analyzing CPU 47:
  driver: amd-pstate-epp
  CPUs which run at the same hardware frequency: 47
  CPUs which need to have their frequency coordinated by software: 47
  maximum transition latency:  Cannot determine or is not supported.
  hardware limits: 400 MHz - 2.18 GHz
  available cpufreq governors: performance powersave
  current policy: frequency should be within 400 MHz and 2.18 GHz.
                  The governor "performance" may decide which speed to use
                  within this range.
  current CPU frequency: Unable to call hardware
  current CPU frequency: 2.17 GHz (asserted by call to kernel)
  boost state support:
    Supported: yes
    Active: yes
    AMD PSTATE Highest Performance: 166. Maximum Frequency: 2.18 GHz.
    AMD PSTATE Nominal Performance: 152. Nominal Frequency: 2.00 GHz.
    AMD PSTATE Lowest Non-linear Performance: 115. Lowest Non-linear Frequency: 1.51 GHz.
    AMD PSTATE Lowest Performance: 31. Lowest Frequency: 400 MHz.

Thanks,
Morgan

-----Original Message-----
From: Mario Limonciello <mario.limonciello@....com> 
Sent: Tuesday, July 2, 2024 10:49 AM
To: Dhananjay Ugwekar <Dhananjay.Ugwekar@....com>; rafael@...nel.org; viresh.kumar@...aro.org; gautham.shenoy@....com; perry.yuan@....com; skhan@...uxfoundation.org; li.meng@....com; ray.huang@....com
Cc: linux-pm@...r.kernel.org; linux-kernel@...r.kernel.org; David Arcari <darcari@...hat.com>
Subject: Re: [PATCH v2 2/2] cpufreq/amd-pstate: Fix the scaling_max_freq setting on shared memory CPPC systems

On 7/2/2024 3:14, Dhananjay Ugwekar wrote:
> On shared memory CPPC systems, with amd_pstate=active mode, the change 
> in scaling_max_freq doesn't get written to the shared memory region. 
> Due to this, the writes to the scaling_max_freq sysfs file don't take 
> effect. Fix this by propagating the scaling_max_freq changes to the 
> shared memory region.
> 
> Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP 
> support for the AMD processors")
> Reported-by: David Arcari <darcari@...hat.com>
> Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@....com>
Reviewed-by: Mario Limonciello <mario.limonciello@....com>
> ---
>   drivers/cpufreq/amd-pstate.c | 43 +++++++++++++++++++-----------------
>   1 file changed, 23 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c 
> b/drivers/cpufreq/amd-pstate.c index 9ad62dbe8bfb..a092b13ffbc2 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -247,6 +247,26 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
>   	return index;
>   }
>   
> +static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
> +			       u32 des_perf, u32 max_perf, bool fast_switch) {
> +	if (fast_switch)
> +		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
> +	else
> +		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> +			      READ_ONCE(cpudata->cppc_req_cached));
> +}
> +
> +DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
> +
> +static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
> +					  u32 min_perf, u32 des_perf,
> +					  u32 max_perf, bool fast_switch) {
> +	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
> +					    max_perf, fast_switch);
> +}
> +
>   static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
>   {
>   	int ret;
> @@ -263,6 +283,9 @@ static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
>   		if (!ret)
>   			cpudata->epp_cached = epp;
>   	} else {
> +		amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
> +					     cpudata->max_limit_perf, false);
> +
>   		perf_ctrls.energy_perf = epp;
>   		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
>   		if (ret) {
> @@ -452,16 +475,6 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
>   	return static_call(amd_pstate_init_perf)(cpudata);
>   }
>   
> -static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
> -			       u32 des_perf, u32 max_perf, bool fast_switch)
> -{
> -	if (fast_switch)
> -		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
> -	else
> -		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> -			      READ_ONCE(cpudata->cppc_req_cached));
> -}
> -
>   static void cppc_update_perf(struct amd_cpudata *cpudata,
>   			     u32 min_perf, u32 des_perf,
>   			     u32 max_perf, bool fast_switch) @@ -475,16 +488,6 @@ static 
> void cppc_update_perf(struct amd_cpudata *cpudata,
>   	cppc_set_perf(cpudata->cpu, &perf_ctrls);
>   }
>   
> -DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
> -
> -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
> -					  u32 min_perf, u32 des_perf,
> -					  u32 max_perf, bool fast_switch)
> -{
> -	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
> -					    max_perf, fast_switch);
> -}
> -
>   static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
>   {
>   	u64 aperf, mperf, tsc;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ