lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <ce9c071a11620b3ae7c155849483f8cbdbe0837e.1764801860.git.tim.c.chen@linux.intel.com>
Date: Wed,  3 Dec 2025 15:07:30 -0800
From: Tim Chen <tim.c.chen@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	"Gautham R . Shenoy" <gautham.shenoy@....com>,
	Vincent Guittot <vincent.guittot@...aro.org>
Cc: Tim Chen <tim.c.chen@...ux.intel.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
	Hillf Danton <hdanton@...a.com>,
	Shrikanth Hegde <sshegde@...ux.ibm.com>,
	Jianyong Wu <jianyong.wu@...look.com>,
	Yangyu Chen <cyy@...self.name>,
	Tingyin Duan <tingyin.duan@...il.com>,
	Vern Hao <vernhao@...cent.com>,
	Vern Hao <haoxing990@...il.com>,
	Len Brown <len.brown@...el.com>,
	Aubrey Li <aubrey.li@...el.com>,
	Zhao Liu <zhao1.liu@...el.com>,
	Chen Yu <yu.chen.surf@...il.com>,
	Chen Yu <yu.c.chen@...el.com>,
	Adam Li <adamli@...amperecomputing.com>,
	Aaron Lu <ziqianlu@...edance.com>,
	Tim Chen <tim.c.chen@...el.com>,
	linux-kernel@...r.kernel.org
Subject: [PATCH v2 11/23] sched/cache: Prioritize tasks preferring destination LLC during balancing

During LLC load balancing, first check for tasks that prefer the
destination LLC and balance them to it before others.

Mark source sched groups containing tasks preferring non local LLCs
with the group_llc_balance flag. This ensures the load balancer later
pulls or pushes these tasks toward their preferred LLCs.

The load balancer selects the busiest sched_group and migrates tasks
to less busy groups to distribute load across CPUs.

With cache-aware scheduling enabled, the busiest sched_group is
the one with most tasks preferring the destination LLC. If
the group has the llc_balance flag set, cache aware load balancing is
triggered.

Introduce the helper function update_llc_busiest() to identify the
sched_group with the most tasks preferring the destination LLC.

Suggested-by: K Prateek Nayak <kprateek.nayak@....com>
Co-developed-by: Chen Yu <yu.c.chen@...el.com>
Signed-off-by: Chen Yu <yu.c.chen@...el.com>
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
---

Notes:
    v1->v2:
       Fix comparison in can_migrate_llc(), which uses an uninitialized
       env->src_cpu. Use the candidate group's first CPU instead. (Aaron Lu)
    
       Fix a race condition during bootup with build_sched_domains(),
       where the per-cpu(sd_llc_id) is reset to -1. (lkp/0day)
       Put the set of group_llc_balance and the usage of it into
       1 patch. (Peter Zijlstra)
    
       Change group_llc_balance priority to be lower than group_overloaded
       and embed it into normal load balance path. (Peter Zijlstra)
    
       Remove the sched group's SD_SHARE_LLC check in llc_balance(), because
       we should allow tasks migration across NUMA nodes to their preferred LLC,
       where the domain does not have SD_SHARE_LLC flag.

 kernel/sched/fair.c | 66 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6e4c1ae1bdda..db555c11b5b8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9531,6 +9531,11 @@ enum group_type {
 	 * from balancing the load across the system.
 	 */
 	group_imbalanced,
+	/*
+	 * There are tasks running on non-preferred LLC, possible to move
+	 * them to their preferred LLC without creating too much imbalance.
+	 */
+	group_llc_balance,
 	/*
 	 * The CPU is overloaded and can't provide expected CPU cycles to all
 	 * tasks.
@@ -10440,6 +10445,7 @@ struct sg_lb_stats {
 	enum group_type group_type;
 	unsigned int group_asym_packing;	/* Tasks should be moved to preferred CPU */
 	unsigned int group_smt_balance;		/* Task on busy SMT be moved */
+	unsigned int group_llc_balance;		/* Tasks should be moved to preferred LLC */
 	unsigned long group_misfit_task_load;	/* A CPU has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
@@ -10698,6 +10704,9 @@ group_type group_classify(unsigned int imbalance_pct,
 	if (group_is_overloaded(imbalance_pct, sgs))
 		return group_overloaded;
 
+	if (sgs->group_llc_balance)
+		return group_llc_balance;
+
 	if (sg_imbalanced(group))
 		return group_imbalanced;
 
@@ -10890,11 +10899,55 @@ static void record_sg_llc_stats(struct lb_env *env,
 	if (unlikely(READ_ONCE(sd_share->capacity) != sgs->group_capacity))
 		WRITE_ONCE(sd_share->capacity, sgs->group_capacity);
 }
+
+/*
+ * Do LLC balance on sched group that contains LLC, and have tasks preferring
+ * to run on LLC in idle dst_cpu.
+ */
+static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
+			       struct sched_group *group)
+{
+	if (!sched_cache_enabled())
+		return false;
+
+	if (env->sd->flags & SD_SHARE_LLC)
+		return false;
+
+	if (sgs->nr_pref_llc &&
+	    can_migrate_llc(cpumask_first(sched_group_span(group)),
+			    env->dst_cpu, 0, true) == mig_llc)
+		return true;
+
+	return false;
+}
+
+static bool update_llc_busiest(struct lb_env *env,
+			       struct sg_lb_stats *busiest,
+			       struct sg_lb_stats *sgs)
+{
+	/*
+	 * There are more tasks that want to run on dst_cpu's LLC.
+	 */
+	return sgs->nr_pref_llc > busiest->nr_pref_llc;
+}
 #else
 static inline void record_sg_llc_stats(struct lb_env *env, struct sg_lb_stats *sgs,
 				       struct sched_group *group)
 {
 }
+
+static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
+			       struct sched_group *group)
+{
+	return false;
+}
+
+static bool update_llc_busiest(struct lb_env *env,
+			       struct sg_lb_stats *busiest,
+			       struct sg_lb_stats *sgs)
+{
+	return false;
+}
 #endif
 
 /**
@@ -10993,6 +11046,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 		/* Check for loaded SMT group to be balanced to dst CPU */
 		if (smt_balance(env, sgs, group))
 			sgs->group_smt_balance = 1;
+
+		/* Check for tasks in this group can be moved to their preferred LLC */
+		if (llc_balance(env, sgs, group))
+			sgs->group_llc_balance = 1;
 	}
 
 	sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
@@ -11056,6 +11113,10 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 		/* Select the overloaded group with highest avg_load. */
 		return sgs->avg_load > busiest->avg_load;
 
+	case group_llc_balance:
+		/* Select the group with most tasks preferring dst LLC */
+		return update_llc_busiest(env, busiest, sgs);
+
 	case group_imbalanced:
 		/*
 		 * Select the 1st imbalanced group as we don't have any way to
@@ -11318,6 +11379,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
 			return false;
 		break;
 
+	case group_llc_balance:
 	case group_imbalanced:
 	case group_asym_packing:
 	case group_smt_balance:
@@ -11450,6 +11512,7 @@ sched_balance_find_dst_group(struct sched_domain *sd, struct task_struct *p, int
 			return NULL;
 		break;
 
+	case group_llc_balance:
 	case group_imbalanced:
 	case group_asym_packing:
 	case group_smt_balance:
@@ -11949,7 +12012,8 @@ static struct sched_group *sched_balance_find_src_group(struct lb_env *env)
 	 * group's child domain.
 	 */
 	if (sds.prefer_sibling && local->group_type == group_has_spare &&
-	    sibling_imbalance(env, &sds, busiest, local) > 1)
+	    (busiest->group_type == group_llc_balance ||
+	    sibling_imbalance(env, &sds, busiest, local) > 1))
 		goto force_balance;
 
 	if (busiest->group_type != group_overloaded) {
-- 
2.32.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ