linux-kernel - [RFC PATCH v4 21/28] sched: Introduce a static key to enable cache aware only for multi LLCs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <fa45f8eb38ab06d02847d57195c6304af3107c70.1754712565.git.tim.c.chen@linux.intel.com>
Date: Sat,  9 Aug 2025 13:07:47 +0800
From: Chen Yu <yu.c.chen@...el.com>
To: Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	"Gautham R . Shenoy" <gautham.shenoy@....com>
Cc: Vincent Guittot <vincent.guittot@...aro.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Libo Chen <libo.chen@...cle.com>,
	Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
	Hillf Danton <hdanton@...a.com>,
	Shrikanth Hegde <sshegde@...ux.ibm.com>,
	Jianyong Wu <jianyong.wu@...look.com>,
	Yangyu Chen <cyy@...self.name>,
	Tingyin Duan <tingyin.duan@...il.com>,
	Vern Hao <vernhao@...cent.com>,
	Len Brown <len.brown@...el.com>,
	Tim Chen <tim.c.chen@...ux.intel.com>,
	Aubrey Li <aubrey.li@...el.com>,
	Zhao Liu <zhao1.liu@...el.com>,
	Chen Yu <yu.chen.surf@...il.com>,
	Chen Yu <yu.c.chen@...el.com>,
	linux-kernel@...r.kernel.org
Subject: [RFC PATCH v4 21/28] sched: Introduce a static key to enable cache aware only for multi LLCs

If there are more than one LLCs in the node, the
cache aware scheduling is enabled. Otherwise, the
cache aware scheduling is disabled.

The definition of multiple LLCs in a node is that
every node in the system should have more than one
LLC. For example, if node0, node1, and node2 each
have 4 LLCs, while node3 has 1 LLC (possibly due
to CPU hotplug), cache-aware scheduling should be
disabled.

Suggested-by: Libo Chen <libo.chen@...cle.com>
Co-developed-by: Tim Chen <tim.c.chen@...ux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
Signed-off-by: Chen Yu <yu.c.chen@...el.com>
---
 kernel/sched/fair.c     | 31 ++++++++++++++++++++++---------
 kernel/sched/sched.h    |  1 +
 kernel/sched/topology.c | 22 ++++++++++++++++++++--
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6e61f9e1f628..194ec594561b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1175,6 +1175,8 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
 #define EPOCH_PERIOD	(HZ/100)	/* 10 ms */
 #define EPOCH_OLD	5		/* 50 ms */
 
+DEFINE_STATIC_KEY_FALSE(sched_cache_present);
+
 static int llc_id(int cpu)
 {
 	if (cpu < 0)
@@ -1318,7 +1320,8 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 	unsigned long epoch;
 	int mm_sched_llc = -1;
 
-	if (!sched_feat(SCHED_CACHE))
+	if (!sched_feat(SCHED_CACHE) ||
+	    !static_branch_likely(&sched_cache_present))
 		return;
 
 	if (p->sched_class != &fair_sched_class)
@@ -1366,7 +1369,8 @@ static void task_tick_cache(struct rq *rq, struct task_struct *p)
 	struct callback_head *work = &p->cache_work;
 	struct mm_struct *mm = p->mm;
 
-	if (!sched_feat(SCHED_CACHE))
+	if (!sched_feat(SCHED_CACHE) ||
+	    !static_branch_likely(&sched_cache_present))
 		return;
 
 	if (!mm || !mm->pcpu_sched)
@@ -9063,7 +9067,8 @@ static int select_cache_cpu(struct task_struct *p, int prev_cpu)
 	struct mm_struct *mm = p->mm;
 	int cpu;
 
-	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_WAKE))
+	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_WAKE) ||
+	    !static_branch_likely(&sched_cache_present))
 		return prev_cpu;
 
 	if (!mm || p->nr_cpus_allowed == 1)
@@ -10024,6 +10029,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
 #ifdef CONFIG_SCHED_CACHE
 	if (sched_feat(SCHED_CACHE) && sched_feat(SCHED_CACHE_LB) &&
+	   static_branch_likely(&sched_cache_present) &&
 	    get_migrate_hint(env->src_cpu, env->dst_cpu, p) == mig_forbid)
 		return 0;
 #endif
@@ -10109,7 +10115,8 @@ static struct list_head
 	LIST_HEAD(no_pref_llc);
 	LIST_HEAD(pref_other_llc);
 
-	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB))
+	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB) ||
+	    !static_branch_likely(&sched_cache_present))
 		return tasks;
 
 	if (cpus_share_cache(env->dst_cpu, env->src_cpu))
@@ -10295,6 +10302,7 @@ static int detach_tasks(struct lb_env *env)
 		 * they are tasks that prefer the current LLC.
 		 */
 		if (sched_feat(SCHED_CACHE) && sched_feat(SCHED_CACHE_LB) &&
+		    static_branch_likely(&sched_cache_present) &&
 		    p->preferred_llc != -1 &&
 		    llc_id(env->src_cpu) == p->preferred_llc)
 			break;
@@ -10952,7 +10960,8 @@ static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
 	struct sched_domain *child = env->sd->child;
 	int llc;
 
-	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB))
+	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB) ||
+	    !static_branch_likely(&sched_cache_present))
 		return false;
 
 	if (env->sd->flags & SD_SHARE_LLC)
@@ -11064,7 +11073,8 @@ static void update_sg_if_llc(struct lb_env *env, struct sg_lb_stats *sgs,
 	struct sched_domain_shared *sd_share;
 
 	if (!sched_feat(SCHED_CACHE) || env->idle == CPU_NEWLY_IDLE ||
-	    !sched_feat(SCHED_CACHE_LB))
+	    !sched_feat(SCHED_CACHE_LB) ||
+	    !static_branch_likely(&sched_cache_present))
 		return;
 
 	/* only care the sched domain that spans 1 LLC */
@@ -11126,7 +11136,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 			*sg_overutilized = 1;
 
 #ifdef CONFIG_SCHED_CACHE
-		if (sched_feat(SCHED_CACHE) && sched_feat(SCHED_CACHE_LB)) {
+		if (sched_feat(SCHED_CACHE) && sched_feat(SCHED_CACHE_LB) &&
+		    static_branch_likely(&sched_cache_present)) {
 			int j;
 
 			for (j = 0; j < max_llcs; ++j)
@@ -12412,7 +12423,8 @@ imbalanced_active_balance(struct lb_env *env)
 static inline bool
 break_llc_locality(struct lb_env *env)
 {
-	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB))
+	if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB) ||
+	    !static_branch_likely(&sched_cache_present))
 		return 0;
 
 	if (cpus_share_cache(env->src_cpu, env->dst_cpu))
@@ -12914,7 +12926,8 @@ static int active_load_balance_cpu_stop(void *data)
 #ifdef CONFIG_SCHED_CACHE
 		int llc = llc_idx(target_cpu);
 
-		if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB))
+		if (!sched_feat(SCHED_CACHE) || !sched_feat(SCHED_CACHE_LB) ||
+		    !static_branch_likely(&sched_cache_present))
 			goto out_unlock;
 
 		if (llc < 0)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4464b92767ad..3e60618a88e9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2857,6 +2857,7 @@ extern unsigned int sysctl_numa_balancing_hot_threshold;
 #ifdef CONFIG_SCHED_CACHE
 extern unsigned int sysctl_llc_aggr_cap;
 extern unsigned int sysctl_llc_aggr_imb;
+extern struct static_key_false sched_cache_present;
 #endif
 
 #ifdef CONFIG_SCHED_HRTICK
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 91a2b7f65fee..8483c02b4d28 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2476,6 +2476,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	int i, ret = -ENOMEM;
 	bool has_asym = false;
 	bool has_cluster = false;
+	bool llc_has_parent_sd = false;
+	unsigned int multi_llcs_node = 1;
 
 #ifdef CONFIG_SCHED_CACHE
 	if (max_llcs < 0) {
@@ -2545,6 +2547,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 				struct sched_domain __rcu *top_p;
 				unsigned int nr_llcs;
 
+				if (!llc_has_parent_sd)
+					llc_has_parent_sd = true;
 				/*
 				 * For a single LLC per node, allow an
 				 * imbalance up to 12.5% of the node. This is
@@ -2566,10 +2570,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 				 * between LLCs and memory channels.
 				 */
 				nr_llcs = sd->span_weight / child->span_weight;
-				if (nr_llcs == 1)
+				/*
+				 * iff all nodes have multiple LLCs, the
+				 * multi_llcs_node will be set to 1. If
+				 * there is at least 1 node having 1 single
+				 * LLC, the multi_llcs_node remains 0.
+				 */
+				if (nr_llcs == 1) {
 					imb = sd->span_weight >> 3;
-				else
+					multi_llcs_node = 0;
+				} else {
 					imb = nr_llcs;
+					multi_llcs_node &= 1;
+				}
 				imb = max(1U, imb);
 				sd->imb_numa_nr = imb;
 
@@ -2617,6 +2630,11 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	if (has_cluster)
 		static_branch_inc_cpuslocked(&sched_cluster_active);
 
+#ifdef CONFIG_SCHED_CACHE
+	if (llc_has_parent_sd && multi_llcs_node && !sched_asym_cpucap_active())
+		static_branch_inc_cpuslocked(&sched_cache_present);
+#endif
+
 	if (rq && sched_debug_verbose)
 		pr_info("root domain span: %*pbl\n", cpumask_pr_args(cpu_map));
 
-- 
2.25.1