lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Fri, 26 Jan 2024 07:32:55 +0100
From: Dragan Simic <dsimic@...jaro.org>
To: Daniel Lezcano <daniel.lezcano@...aro.org>
Cc: Alexey Charkov <alchark@...il.com>, Rob Herring <robh+dt@...nel.org>,
 Krzysztof Kozlowski <krzysztof.kozlowski+dt@...aro.org>, Conor Dooley
 <conor+dt@...nel.org>, Heiko Stuebner <heiko@...ech.de>,
 devicetree@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
 linux-rockchip@...ts.infradead.org, linux-kernel@...r.kernel.org, Viresh
 Kumar <viresh.kumar@...aro.org>
Subject: Re: [PATCH 4/4] arm64: dts: rockchip: Add OPP data for CPU cores on
 RK3588

Hello Daniel,

On 2024-01-25 10:30, Daniel Lezcano wrote:
> On 24/01/2024 21:30, Alexey Charkov wrote:
>> By default the CPUs on RK3588 start up in a conservative performance
>> mode. Add frequency and voltage mappings to the device tree to enable
>> dynamic scaling via cpufreq
>> 
>> Signed-off-by: Alexey Charkov <alchark@...il.com>
>> ---
>>   arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 209 
>> ++++++++++++++++++++++++++++++
>>   1 file changed, 209 insertions(+)
>> 
>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi 
>> b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> index 131b9eb21398..e605be531a0f 100644
>> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> @@ -97,6 +97,7 @@ cpu_l0: cpu@0 {
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
>>   			assigned-clock-rates = <816000000>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -116,6 +117,7 @@ cpu_l1: cpu@100 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <530>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -135,6 +137,7 @@ cpu_l2: cpu@200 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <530>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -154,6 +157,7 @@ cpu_l3: cpu@300 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <530>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -175,6 +179,7 @@ cpu_b0: cpu@400 {
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>>   			assigned-clock-rates = <816000000>;
>> +			operating-points-v2 = <&cluster1_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -194,6 +199,7 @@ cpu_b1: cpu@500 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <1024>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>> +			operating-points-v2 = <&cluster1_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -215,6 +221,7 @@ cpu_b2: cpu@600 {
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>>   			assigned-clock-rates = <816000000>;
>> +			operating-points-v2 = <&cluster2_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -234,6 +241,7 @@ cpu_b3: cpu@700 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <1024>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>> +			operating-points-v2 = <&cluster2_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -348,6 +356,207 @@ l3_cache: l3-cache {
>>   		};
>>   	};
>>   +	cluster0_opp_table: opp-table-cluster0 {
>> +		compatible = "operating-points-v2";
>> +		opp-shared;
>> +
>> +		opp-408000000 {
>> +			opp-hz = /bits/ 64 <408000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-600000000 {
>> +			opp-hz = /bits/ 64 <600000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-816000000 {
>> +			opp-hz = /bits/ 64 <816000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1008000000 {
>> +			opp-hz = /bits/ 64 <1008000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> It is not useful to introduce OPP with the same voltage. There is no
> gain in terms of energy efficiency as the compute capacity is linearly
> tied with power consumption (P=CxFxV²) in this case.
> 
> For example, opp-408 consumes 2 bogoWatts and opp-816 consumes 4
> bogoWatts (because of the same voltage).
> 
> For a workload, opp-408 takes 10 sec and opp-816 takes 5 sec because
> it is twice faster.
> 
> The energy consumption is:
> 
> opp-408 = 10 x 2 = 20 BogoJoules
> opp-816 = 5 x 4 = 20 BogoJoules

I'd respectfully disagree that including multiple OPPs with the same 
voltage
but different frequencies isn't useful.  Please allow me to explain.

See, the total amount of consumed energy is, in general, the same for 
such
OPPs and the same CPU task(s), if we ignore the static leakage current 
and
such stuff, which isn't important here.  Though, the emphasis here is on
"total", i.e. without taking into account the actual amount of time 
required
for the exemplified CPU task(s) to complete.  If the total amount of 
time
is quite short, we aren't going to heat up the package and the board 
enough
to hit the CPU thermal throttling;  this approach is also sometimes 
referred
to as "race to idle", which is actually quite effective for 
battery-powered
mobile devices that tend to load their CPU cores in bursts, while 
remaining
kind of inactive for the remaining time.

However, if the CPU task(s) last long enough to actually saturate the 
thermal
capacities of the package and the board or the device, we're getting 
into the
CPU throttling territory, in which running the CPU cores slower, but 
still as
fast as possible, may actually be beneficial for the overall CPU 
performance.
By running the CPU cores slower, we're lowering the power and 
"spreading" the
total energy consumption over time, i.e. we're making some time to allow 
the
generated heat to dissipate into the surroundings.  As we know, having 
more
energy consumed by the SoC means more heat generated by the SoC, but the
resulting temperature of the SoC depends on how fast the energy is 
consumed,
which equals to how fast the CPUs run;  of course, all that is valid 
under
the reasonable assumption that the entire cooling setup, including the 
board
surroundings, remains unchanged all the time.

Having all that in mind, having a few OPPs with the same voltage but 
different
frequencies can actually help us achieve better CPU performance.  That 
way,
throttling won't have to slow the CPUs more than it's actually needed to 
hit
and maintain the desired thermal trip temperatures.

>> +		opp-1200000000 {
>> +			opp-hz = /bits/ 64 <1200000000>;
>> +			opp-microvolt = <712500 712500 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1416000000 {
>> +			opp-hz = /bits/ 64 <1416000000>;
>> +			opp-microvolt = <762500 762500 950000>;
>> +			clock-latency-ns = <40000>;
>> +			opp-suspend;
>> +		};
>> +		opp-1608000000 {
>> +			opp-hz = /bits/ 64 <1608000000>;
>> +			opp-microvolt = <850000 850000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1800000000 {
>> +			opp-hz = /bits/ 64 <1800000000>;
>> +			opp-microvolt = <950000 950000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +	};
>> +
>> +	cluster1_opp_table: opp-table-cluster1 {
>> +		compatible = "operating-points-v2";
>> +		opp-shared;
>> +
>> +		opp-408000000 {
>> +			opp-hz = /bits/ 64 <408000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +			opp-suspend;
>> +		};
>> +		opp-600000000 {
>> +			opp-hz = /bits/ 64 <600000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-816000000 {
>> +			opp-hz = /bits/ 64 <816000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1008000000 {
>> +			opp-hz = /bits/ 64 <1008000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> same comment
> 
>> +		opp-1200000000 {
>> +			opp-hz = /bits/ 64 <1200000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1416000000 {
>> +			opp-hz = /bits/ 64 <1416000000>;
>> +			opp-microvolt = <725000 725000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1608000000 {
>> +			opp-hz = /bits/ 64 <1608000000>;
>> +			opp-microvolt = <762500 762500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1800000000 {
>> +			opp-hz = /bits/ 64 <1800000000>;
>> +			opp-microvolt = <850000 850000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2016000000 {
>> +			opp-hz = /bits/ 64 <2016000000>;
>> +			opp-microvolt = <925000 925000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2208000000 {
>> +			opp-hz = /bits/ 64 <2208000000>;
>> +			opp-microvolt = <987500 987500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2256000000 {
>> +			opp-hz = /bits/ 64 <2256000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2304000000 {
>> +			opp-hz = /bits/ 64 <2304000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2352000000 {
>> +			opp-hz = /bits/ 64 <2352000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2400000000 {
>> +			opp-hz = /bits/ 64 <2400000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> Same comment
> 
>> +	};
>> +
>> +	cluster2_opp_table: opp-table-cluster2 {
>> +		compatible = "operating-points-v2";
>> +		opp-shared;
>> +
>> +		opp-408000000 {
>> +			opp-hz = /bits/ 64 <408000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +			opp-suspend;
>> +		};
>> +		opp-600000000 {
>> +			opp-hz = /bits/ 64 <600000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-816000000 {
>> +			opp-hz = /bits/ 64 <816000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1008000000 {
>> +			opp-hz = /bits/ 64 <1008000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1200000000 {
>> +			opp-hz = /bits/ 64 <1200000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1416000000 {
>> +			opp-hz = /bits/ 64 <1416000000>;
>> +			opp-microvolt = <725000 725000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1608000000 {
>> +			opp-hz = /bits/ 64 <1608000000>;
>> +			opp-microvolt = <762500 762500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1800000000 {
>> +			opp-hz = /bits/ 64 <1800000000>;
>> +			opp-microvolt = <850000 850000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2016000000 {
>> +			opp-hz = /bits/ 64 <2016000000>;
>> +			opp-microvolt = <925000 925000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2208000000 {
>> +			opp-hz = /bits/ 64 <2208000000>;
>> +			opp-microvolt = <987500 987500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2256000000 {
>> +			opp-hz = /bits/ 64 <2256000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2304000000 {
>> +			opp-hz = /bits/ 64 <2304000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2352000000 {
>> +			opp-hz = /bits/ 64 <2352000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2400000000 {
>> +			opp-hz = /bits/ 64 <2400000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> Same comment
> 
>> +	};
>> +
>>   	firmware {
>>   		optee: optee {
>>   			compatible = "linaro,optee-tz";
>> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ