lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 11 May 2022 09:38:54 +0000
From:   王擎 <wangqing@...o.com>
To:     Dietmar Eggemann <dietmar.eggemann@....com>,
        Sudeep Holla <sudeep.holla@....com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        "Rafael J. Wysocki" <rafael@...nel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [PATCH] arch_topology: support parsing cluster_id from DT


>> From: Wang Qing <wangqing@...o.com>
>> 
>> Use nested cluster structures in DT to support describing multi-level
>> cluster topologies.
>> 
>> Notice: the clusters describing in DT currently are not physical
>> boundaries, since changing "cluster" to "socket" is too involved and error
>> prone, this patch will not have any effect on one-level cluster topo, but
>> can support the mutil-level cluster topo to support CLUSTER_SCHED.
>> 
>> Signed-off-by: Wang Qing <wangqing@...o.com>
>> ---
>>  drivers/base/arch_topology.c | 25 +++++++++++++++++--------
>>  1 file changed, 17 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
>> index 1d6636ebaac5..f2ea8113d619 100644
>> --- a/drivers/base/arch_topology.c
>> +++ b/drivers/base/arch_topology.c
>> @@ -491,7 +491,7 @@ static int __init get_cpu_for_node(struct device_node *node)
>>  }
>>  
>>  static int __init parse_core(struct device_node *core, int package_id,
>> -                          int core_id)
>> +                          int cluster_id, int core_id)
>>  {
>>        char name[20];
>>        bool leaf = true;
>> @@ -507,6 +507,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>>                        cpu = get_cpu_for_node(t);
>>                        if (cpu >= 0) {
>>                                cpu_topology[cpu].package_id = package_id;
>> +                             cpu_topology[cpu].cluster_id = cluster_id;
>>                                cpu_topology[cpu].core_id = core_id;
>>                                cpu_topology[cpu].thread_id = i;
>>                        } else if (cpu != -ENODEV) {
>> @@ -528,6 +529,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>>                }
>>  
>>                cpu_topology[cpu].package_id = package_id;
>> +             cpu_topology[cpu].cluster_id = cluster_id;
>>                cpu_topology[cpu].core_id = core_id;
>>        } else if (leaf && cpu != -ENODEV) {
>>                pr_err("%pOF: Can't get CPU for leaf core\n", core);
>> @@ -544,13 +546,15 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>        bool has_cores = false;
>>        struct device_node *c;
>>        static int package_id __initdata;
>> +     static int cluster_id __initdata;
>
>Starting with cluster_id = 0 breaks existing platforms like
>./arch/arm64/boot/dts/arm/juno.dts. For them it has to be still set to -1.

Yes, I noticed this problem, please help to review version V2.

>
>You get e.g.:
>
># cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
>CLS
>DIE
>
>instead of:
>
># cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
>MC
>DIE
>
>
>>        int core_id = 0;
>>        int i, ret;
>>  
>>        /*
>> -      * First check for child clusters; we currently ignore any
>> -      * information about the nesting of clusters and present the
>> -      * scheduler with a flat list of them.
>> +      * nesting of clusters :
>> +      * level 1:  package_id
>> +      * level 2:  cluster_id
>> +      * level 3+: ignore
>>         */
>>        i = 0;
>>        do {
>> @@ -559,6 +563,14 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>                if (c) {
>>                        leaf = false;
>>                        ret = parse_cluster(c, depth + 1);
>> +                     if (depth == 0) {
>> +                             package_id++;
>> +                             cluster_id = 0;
>
>-                               cluster_id = 0;
>+                               cluster_id = -1;

This modification is not enough..

>
>Would have to be cluster_id = -1. 0 is a valid 2. level cluster.
>Otherwise you're not removing the CLS data from CPU4 to CPU7 in the
>`Armv9 with L2 complexes` cpu-map example I used for testing:
>
>                cpu-map {
>                        cluster0 {
>                                cluster0 {
>                                        core0 {
>                                                cpu = <&cpu0>;
>                                        };
>                                        core1 {
>                                                cpu = <&cpu1>;
>                                        };
>                                };
>                                cluster1 {
>                                        core0 {
>                                                cpu = <&cpu2>;
>                                        };
>                                        core1 {
>                                                cpu = <&cpu3>;
>                                        };
>                                };
>                        };
>                        cluster1 {
>                                core0 {
>                                        cpu = <&cpu4>;
>                                };
>                                core1 {
>                                        cpu = <&cpu5>;
>                                };
>                                core2 {
>                                        cpu = <&cpu6>;
>                                };
>                        };
>                        cluster2 {
>                                core0 {
>                                        cpu = <&cpu7>;
>                                };
>                        };
>                };
>
>> +                     } else if (depth == 1)
>> +                             cluster_id++;
>> +                     else
>> +                             pr_err("Ignore nested clusters with more than two levels!\n");
>> +
>>                        of_node_put(c);
>>                        if (ret != 0)
>>                                return ret;
>> @@ -582,7 +594,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>                        }
>>  
>>                        if (leaf) {
>> -                             ret = parse_core(c, package_id, core_id++);
>> +                             ret = parse_core(c, package_id, cluster_id, core_id++);
>>                        } else {
>>                                pr_err("%pOF: Non-leaf cluster with core %s\n",
>>                                       cluster, name);
>> @@ -599,9 +611,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>        if (leaf && !has_cores)
>>                pr_warn("%pOF: empty cluster\n", cluster);
>>  
>> -     if (leaf)
>> -             package_id++;
>> -
>>        return 0;
>>  }
>
>Looks like you also need to adapt update_siblings_masks() to only set
>cpu in &cpu_topo->thread_sibling and &cpuid_topo->thread_sibling when
>`cpu_topo->thread_id != -1`.
>
>@@ -723,11 +723,11 @@ void update_siblings_masks(unsigned int cpuid)
>                cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
>                cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
>
>-               if (cpuid_topo->core_id != cpu_topo->core_id)
>-                       continue;
>-
>-               cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>-               cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>+               if (cpu_topo->thread_id != -1 &&
>+                   cpuid_topo->core_id == cpu_topo->core_id) {
>+                       cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>+                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>+

This seems like another problem?

Thanks,
Qing

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ