lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <861e15234270eb3678390da2b0ddf3a7162d98dd.camel@linux.intel.com>
Date: Mon, 29 Sep 2025 15:18:53 -0700
From: Tim Chen <tim.c.chen@...ux.intel.com>
To: "Chen, Yu C" <yu.c.chen@...el.com>, Peter Zijlstra
 <peterz@...radead.org>,  Ingo Molnar <mingo@...hat.com>
Cc: Juri Lelli <juri.lelli@...hat.com>, Dietmar Eggemann	
 <dietmar.eggemann@....com>, Ben Segall <bsegall@...gle.com>, Mel Gorman	
 <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>, Tim Chen	
 <tim.c.chen@...el.com>, Vincent Guittot <vincent.guittot@...aro.org>, Len
 Brown	 <len.brown@...el.com>, linux-kernel@...r.kernel.org, K Prateek Nayak
	 <kprateek.nayak@....com>, "Gautham R . Shenoy" <gautham.shenoy@....com>, 
 Zhao Liu <zhao1.liu@...el.com>, Vinicius Costa Gomes
 <vinicius.gomes@...el.com>, Arjan Van De Ven	 <arjan.van.de.ven@...el.com>
Subject: Re: [PATCH v4 1/2] sched: Create architecture specific sched domain
 distances

On Sat, 2025-09-27 at 20:34 +0800, Chen, Yu C wrote:
> [snip]
> 
> > @@ -1591,10 +1591,12 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
> >   enum numa_topology_type sched_numa_topology_type;
> >   
> >   static int			sched_domains_numa_levels;
> > +static int			sched_numa_node_levels;
> 
> I agree that the benefit of maintaining two NUMA distances - one for the
> sched_domain and another for the NUMA balancing/page allocation policy - is
> to avoid complicating the sched_domain hierarchy while preserving the
> advantages of NUMA locality.
> 
> Meanwhile, I wonder if we could also add a "orig" prefix to the original
> NUMA distance. This way, we can quickly understand its meaning later.
> For example,
> sched_orig_node_levels
> sched_orig_node_distance

I am not sure adding orig will make the meaning any clearer.
I can add comments to note that

sched_numa_node_distance mean the node distance between numa nodes
sched_numa_nodel_levels  mean the number of unique distances between numa nodes

> 
> >   static int			sched_domains_curr_level;
> >   
> >   int				sched_max_numa_distance;
> >   static int			*sched_domains_numa_distance;
> > +static int			*sched_numa_node_distance;
> >   static struct cpumask		***sched_domains_numa_masks;
> >   #endif /* CONFIG_NUMA */
> >   
> > @@ -1808,10 +1810,10 @@ bool find_numa_distance(int distance)
> >   		return true;
> >   
> >   	rcu_read_lock();
> > -	distances = rcu_dereference(sched_domains_numa_distance);
> > +	distances = rcu_dereference(sched_numa_node_distance);
> >   	if (!distances)
> >   		goto unlock;
> > -	for (i = 0; i < sched_domains_numa_levels; i++) {
> > +	for (i = 0; i < sched_numa_node_levels; i++) {
> >   		if (distances[i] == distance) {
> >   			found = true;
> >   			break;
> > @@ -1887,14 +1889,48 @@ static void init_numa_topology_type(int offline_node)
> >   
> >   #define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
> >   
> > -void sched_init_numa(int offline_node)
> > +/*
> > + * An architecture could modify its NUMA distance, to change
> > + * grouping of NUMA nodes and number of NUMA levels when creating
> > + * NUMA level sched domains.
> > + *
> > + * A NUMA level is created for each unique
> > + * arch_sched_node_distance.
> > + */
> > +static bool __modified_sched_node_dist = true;
> > +
> > +int __weak arch_sched_node_distance(int from, int to)
> >   {
> > -	struct sched_domain_topology_level *tl;
> > -	unsigned long *distance_map;
> > +	if (__modified_sched_node_dist)
> > +		__modified_sched_node_dist = false;
> > +
> > +	return node_distance(from, to);
> > +}
> > +
> > +static bool modified_sched_node_distance(void)
> > +{
> > +	/*
> > +	 * Call arch_sched_node_distance()
> > +	 * to determine if arch_sched_node_distance
> > +	 * has been modified from node_distance()
> > +	 * to arch specific distance.
> > +	 */
> > +	arch_sched_node_distance(0, 0);
> > +	return __modified_sched_node_dist;
> > +}
> > +
> 
> If our goal is to figure out whether the arch_sched_node_distance()
> has been overridden, how about the following alias?
> 
> int __weak arch_sched_node_distance(int from, int to)
> {
> 	return __node_distance(from, to);
> }
> int arch_sched_node_distance_original(int from, int to) __weak 
> __alias(arch_sched_node_distance);
> 
> static bool arch_sched_node_distance_is_overridden(void)
> {
> 	return arch_sched_node_distance != arch_sched_node_distance_original;
> }
> 
> so arch_sched_node_distance_is_overridden() can replace 
> modified_sched_node_distance()
> 

I think that the alias version will still point to the replaced function and not
the originally defined one.

How about not using __weak and just explicitly define arch_sched_node_distance
as a function pointer.  Change the code like below.

Tim

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d6b772990ec2..12db78af09d5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -545,7 +545,7 @@ static int avg_remote_numa_distance(void)
 	return sched_avg_remote_distance;
 }
 
-int arch_sched_node_distance(int from, int to)
+static int x86_arch_sched_node_distance(int from, int to)
 {
 	int d = node_distance(from, to);
 
@@ -918,6 +918,9 @@ static int do_boot_cpu(u32 apicid, unsigned int cpu, struct task_struct *idle)
 	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
 	if (apic->wakeup_secondary_cpu_64)
 		start_ip = real_mode_header->trampoline_start64;
+#endif
+#ifdef CONFIG_NUMA
+	arch_sched_node_distance = x86_arch_sched_node_distance;
 #endif
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
 	initial_code = (unsigned long)start_secondary;
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 2d2d29553df8..3549c4a19816 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -56,7 +56,7 @@ static inline int cpu_numa_flags(void)
 {
 	return SD_NUMA;
 }
-extern int arch_sched_node_distance(int from, int to);
+extern int (*arch_sched_node_distance)(int, int);
 #endif
 
 extern int arch_asym_cpu_priority(int cpu);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f25e4402c63e..7cfb7422e9d4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1897,26 +1897,17 @@ static void init_numa_topology_type(int offline_node)
  * A NUMA level is created for each unique
  * arch_sched_node_distance.
  */
-static bool __modified_sched_node_dist = true;
 
-int __weak arch_sched_node_distance(int from, int to)
+static int default_sched_node_distance(int from, int to)
 {
-	if (__modified_sched_node_dist)
-		__modified_sched_node_dist = false;
-
 	return node_distance(from, to);
 }
 
+int (*arch_sched_node_distance)(int, int) = default_sched_node_distance;
+
 static bool modified_sched_node_distance(void)
 {
-	/*
-	 * Call arch_sched_node_distance()
-	 * to determine if arch_sched_node_distance
-	 * has been modified from node_distance()
-	 * to arch specific distance.
-	 */
-	arch_sched_node_distance(0, 0);
-	return __modified_sched_node_dist;
+	return arch_sched_node_distance != default_sched_node_distance;
 }
 
 static int numa_node_dist(int i, int j)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ