lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f7d6d1ec-e95d-9c7a-db7c-e1445f6ff8fe@arm.com>
Date:   Tue, 10 May 2022 19:21:41 +0200
From:   Dietmar Eggemann <dietmar.eggemann@....com>
To:     Qing Wang <wangqing@...o.com>, Sudeep Holla <sudeep.holla@....com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        "Rafael J. Wysocki" <rafael@...nel.org>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH] arch_topology: support parsing cluster_id from DT

On 05/05/2022 10:35, Qing Wang wrote:
> From: Wang Qing <wangqing@...o.com>
> 
> Use nested cluster structures in DT to support describing multi-level
> cluster topologies.
> 
> Notice: the clusters describing in DT currently are not physical
> boundaries, since changing "cluster" to "socket" is too involved and error
> prone, this patch will not have any effect on one-level cluster topo, but
> can support the mutil-level cluster topo to support CLUSTER_SCHED.
> 
> Signed-off-by: Wang Qing <wangqing@...o.com>
> ---
>  drivers/base/arch_topology.c | 25 +++++++++++++++++--------
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 1d6636ebaac5..f2ea8113d619 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -491,7 +491,7 @@ static int __init get_cpu_for_node(struct device_node *node)
>  }
>  
>  static int __init parse_core(struct device_node *core, int package_id,
> -			     int core_id)
> +			     int cluster_id, int core_id)
>  {
>  	char name[20];
>  	bool leaf = true;
> @@ -507,6 +507,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>  			cpu = get_cpu_for_node(t);
>  			if (cpu >= 0) {
>  				cpu_topology[cpu].package_id = package_id;
> +				cpu_topology[cpu].cluster_id = cluster_id;
>  				cpu_topology[cpu].core_id = core_id;
>  				cpu_topology[cpu].thread_id = i;
>  			} else if (cpu != -ENODEV) {
> @@ -528,6 +529,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>  		}
>  
>  		cpu_topology[cpu].package_id = package_id;
> +		cpu_topology[cpu].cluster_id = cluster_id;
>  		cpu_topology[cpu].core_id = core_id;
>  	} else if (leaf && cpu != -ENODEV) {
>  		pr_err("%pOF: Can't get CPU for leaf core\n", core);
> @@ -544,13 +546,15 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  	bool has_cores = false;
>  	struct device_node *c;
>  	static int package_id __initdata;
> +	static int cluster_id __initdata;

Starting with cluster_id = 0 breaks existing platforms like
./arch/arm64/boot/dts/arm/juno.dts. For them it has to be still set to -1.

You get e.g.:

# cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
CLS
DIE

instead of:

# cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
MC
DIE


>  	int core_id = 0;
>  	int i, ret;
>  
>  	/*
> -	 * First check for child clusters; we currently ignore any
> -	 * information about the nesting of clusters and present the
> -	 * scheduler with a flat list of them.
> +	 * nesting of clusters :
> +	 * level 1:  package_id
> +	 * level 2:  cluster_id
> +	 * level 3+: ignore
>  	 */
>  	i = 0;
>  	do {
> @@ -559,6 +563,14 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  		if (c) {
>  			leaf = false;
>  			ret = parse_cluster(c, depth + 1);
> +			if (depth == 0) {
> +				package_id++;
> +				cluster_id = 0;

-                               cluster_id = 0;
+                               cluster_id = -1;

Would have to be cluster_id = -1. 0 is a valid 2. level cluster.
Otherwise you're not removing the CLS data from CPU4 to CPU7 in the
`Armv9 with L2 complexes` cpu-map example I used for testing:

                cpu-map {
                        cluster0 {
                                cluster0 {
                                        core0 {
                                                cpu = <&cpu0>;
                                        };
                                        core1 {
                                                cpu = <&cpu1>;
                                        };
                                };
                                cluster1 {
                                        core0 {
                                                cpu = <&cpu2>;
                                        };
                                        core1 {
                                                cpu = <&cpu3>;
                                        };
                                };
                        };
                        cluster1 {
                                core0 {
                                        cpu = <&cpu4>;
                                };
                                core1 {
                                        cpu = <&cpu5>;
                                };
                                core2 {
                                        cpu = <&cpu6>;
                                };
                        };
                        cluster2 {
                                core0 {
                                        cpu = <&cpu7>;
                                };
                        };
                };

> +			} else if (depth == 1)
> +				cluster_id++;
> +			else
> +				pr_err("Ignore nested clusters with more than two levels!\n");
> +
>  			of_node_put(c);
>  			if (ret != 0)
>  				return ret;
> @@ -582,7 +594,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  			}
>  
>  			if (leaf) {
> -				ret = parse_core(c, package_id, core_id++);
> +				ret = parse_core(c, package_id, cluster_id, core_id++);
>  			} else {
>  				pr_err("%pOF: Non-leaf cluster with core %s\n",
>  				       cluster, name);
> @@ -599,9 +611,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  	if (leaf && !has_cores)
>  		pr_warn("%pOF: empty cluster\n", cluster);
>  
> -	if (leaf)
> -		package_id++;
> -
>  	return 0;
>  }

Looks like you also need to adapt update_siblings_masks() to only set
cpu in &cpu_topo->thread_sibling and &cpuid_topo->thread_sibling when
`cpu_topo->thread_id != -1`.

@@ -723,11 +723,11 @@ void update_siblings_masks(unsigned int cpuid)
                cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
                cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);

-               if (cpuid_topo->core_id != cpu_topo->core_id)
-                       continue;
-
-               cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
-               cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+               if (cpu_topo->thread_id != -1 &&
+                   cpuid_topo->core_id == cpu_topo->core_id) {
+                       cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ