lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2a4efeea-cc70-ca0a-81fd-84d8b54586c0@arm.com>
Date:   Fri, 16 Apr 2021 19:00:32 +0200
From:   Dietmar Eggemann <dietmar.eggemann@....com>
To:     Ruifeng Zhang <ruifeng.zhang0110@...il.com>
Cc:     Valentin Schneider <valentin.schneider@....com>,
        linux@...linux.org.uk, sudeep.holla@....com,
        Greg KH <gregkh@...uxfoundation.org>,
        "Rafael J. Wysocki" <rafael@...nel.org>, a.p.zijlstra@...llo.nl,
        mingo@...nel.org, ruifeng.zhang1@...soc.com, nianfu.bai@...soc.com,
        linux-arm-kernel@...ts.infradead.org,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2 0/1] arm: topology: parse the topology from the dt

On 16/04/2021 13:04, Ruifeng Zhang wrote:
> Dietmar Eggemann <dietmar.eggemann@....com> 于2021年4月16日周五 下午6:39写道:
>>
>> On 16/04/2021 11:32, Valentin Schneider wrote:
>>> On 16/04/21 15:47, Ruifeng Zhang wrote:

[...]

>> I'm confused. Do you have the MT bit set to 1 then? So the issue that
>> the mpidr handling in arm32's store_cpu_topology() is not correct does
>> not exist?
> 
> I have reconfirmed it, the MT bit has been set to 1. I am sorry for
> the previous messages.
> The mpidr parse by store_cpu_topology is correct, at least for the sc9863a.

Nice! This is sorted then.

[...]

>> Is this what you need for your arm32 kernel system? Adding the
>> possibility to parse cpu-map to create Phantom Domains?
> 
> Yes, I need parse DT cpu-map to create different Phantom Domains.
> With it, the dts should be change to:
>                 cpu-map {
>                         cluster0 {
>                                 core0 {
>                                         cpu = <&CPU0>;
>                                 };
>                                 core1 {
>                                         cpu = <&CPU1>;
>                                 };
>                                 core2 {
>                                         cpu = <&CPU2>;
>                                 };
>                                 core3 {
>                                         cpu = <&CPU3>;
>                                 };
>                         };
> 
>                         cluster1 {
>                                 core0 {
>                                         cpu = <&CPU4>;
>                                 };
>                                 core1 {
>                                         cpu = <&CPU5>;
>                                 };
>                                 core2 {
>                                         cpu = <&CPU6>;
>                                 };
>                                 core3 {
>                                         cpu = <&CPU7>;
>                                 };
>                         };
>                 };
> 

I'm afraid that this is now a much weaker case to get this into
mainline.

I'm able to run with an extra cpu-map entry:

diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 012d40a7228c..f60d9b448253 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -35,6 +35,29 @@ cpus {
                #address-cells = <1>;
                #size-cells = <0>;
 
+               cpu-map {
+                       cluster0 {
+                               core0 {
+                                       cpu = <&cpu0>;
+                               };
+                               core1 {
+                                       cpu = <&cpu1>;
+                               };
+                       };
+
+                       cluster1 {
+                               core0 {
+                                       cpu = <&cpu2>;
+                               };
+                               core1 {
+                                       cpu = <&cpu3>;
+                               };
+                               core2 {
+                                       cpu = <&cpu4>;
+                               };
+                       };
+               };
+
                cpu0: cpu@0 {
 
a condensed version (see below) of your patch on my Arm32 TC2.
The move of update_cpu_capacity() in store_cpu_topology() is only
necessary when I use the old clock-frequency based cpu_efficiency
approach for asymmetric CPU capacity (TC2 is a15/a7):

diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index f60d9b448253..e0679cca40ed 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -64,7 +64,7 @@ cpu0: cpu@0 {
                        reg = <0>;
                        cci-control-port = <&cci_control1>;
                        cpu-idle-states = <&CLUSTER_SLEEP_BIG>;
-                       capacity-dmips-mhz = <1024>;
+                       clock-frequency = <1000000000>;
                        dynamic-power-coefficient = <990>;
                };
 
@@ -74,7 +74,7 @@ cpu1: cpu@1 {
                        reg = <1>;
                        cci-control-port = <&cci_control1>;
                        cpu-idle-states = <&CLUSTER_SLEEP_BIG>;
-                       capacity-dmips-mhz = <1024>;
+                       clock-frequency = <1000000000>;
                        dynamic-power-coefficient = <990>;
                };
 
@@ -84,7 +84,7 @@ cpu2: cpu@2 {
                        reg = <0x100>;
                        cci-control-port = <&cci_control2>;
                        cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
-                       capacity-dmips-mhz = <516>;
+                       clock-frequency = <800000000>;
                        dynamic-power-coefficient = <133>;
                };
 
@@ -94,7 +94,7 @@ cpu3: cpu@3 {
                        reg = <0x101>;
                        cci-control-port = <&cci_control2>;
                        cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
-                       capacity-dmips-mhz = <516>;
+                       clock-frequency = <800000000>;
                        dynamic-power-coefficient = <133>;
                };
 
@@ -104,7 +104,7 @@ cpu4: cpu@4 {
                        reg = <0x102>;
                        cci-control-port = <&cci_control2>;
                        cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
-                       capacity-dmips-mhz = <516>;
+                       clock-frequency = <800000000>;
                        dynamic-power-coefficient = <133>;
                };



diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index ef0058de432b..bff56c12c3a6 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -82,7 +82,7 @@ static bool cap_from_dt = true;
  * 'average' CPU is of middle capacity. Also see the comments near
  * table_efficiency[] and update_cpu_capacity().
  */
-static void __init parse_dt_topology(void)
+static void __init get_coretype_capacity(void)
 {
        const struct cpu_efficiency *cpu_eff;
        struct device_node *cn = NULL;
@@ -173,7 +173,7 @@ static void update_cpu_capacity(unsigned int cpu)
 }
 
 #else
-static inline void parse_dt_topology(void) {}
+static inline void get_coretype_capacity(void) {}
 static inline void update_cpu_capacity(unsigned int cpuid) {}
 #endif
 
@@ -221,14 +221,13 @@ void store_cpu_topology(unsigned int cpuid)
                cpuid_topo->package_id = -1;
        }
 
-       update_cpu_capacity(cpuid);
-
        pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
                cpuid, cpu_topology[cpuid].thread_id,
                cpu_topology[cpuid].core_id,
                cpu_topology[cpuid].package_id, mpidr);
 
 topology_populated:
+       update_cpu_capacity(cpuid);
        update_siblings_masks(cpuid);
 }
 
@@ -241,5 +240,6 @@ void __init init_cpu_topology(void)
        reset_cpu_topology();
        smp_wmb();
 
+       get_coretype_capacity();
        parse_dt_topology();
 }
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index de8587cc119e..a2335da28f2a 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -295,7 +295,6 @@ static void parsing_done_workfn(struct work_struct *work)
 core_initcall(free_raw_capacity);
 #endif
 
-#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 /*
  * This function returns the logic cpu number of the node.
  * There are basically three kinds of return values:
@@ -441,7 +440,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
        return 0;
 }
 
-static int __init parse_dt_topology(void)
+int __init parse_dt_topology(void)
 {
        struct device_node *cn, *map;
        int ret = 0;
@@ -481,7 +480,6 @@ static int __init parse_dt_topology(void)
        of_node_put(cn);
        return ret;
 }
-#endif
 
 /*
  * cpu topology table
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 0f6cd6b73a61..cfa5a5072aa0 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -66,6 +66,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
 #define topology_llc_cpumask(cpu)      (&cpu_topology[cpu].llc_sibling)
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
+int __init parse_dt_topology(void);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 void update_siblings_masks(unsigned int cpu);
 void remove_cpu_topology(unsigned int cpuid);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ