[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250314163614.1356125-2-vincent.guittot@linaro.org>
Date: Fri, 14 Mar 2025 17:36:08 +0100
From: Vincent Guittot <vincent.guittot@...aro.org>
To: mingo@...hat.com,
peterz@...radead.org,
juri.lelli@...hat.com,
dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
lukasz.luba@....com,
rafael.j.wysocki@...el.com,
pierre.gondois@....com,
linux-kernel@...r.kernel.org
Cc: qyousef@...alina.io,
hongyan.xia2@....com,
christian.loehle@....com,
luis.machado@....com,
qperret@...gle.com,
Vincent Guittot <vincent.guittot@...aro.org>
Subject: [PATCH 1/7 v6] sched/fair: Filter false overloaded_group case for EAS
With EAS, a group should be set overloaded if at least 1 CPU in the group
is overutilized but it can happen that a CPU is fully utilized by tasks
because of clamping the compute capacity of the CPU. In such case, the CPU
is not overutilized and as a result should not be set overloaded as well.
group_overloaded being a higher priority than group_misfit, such group can
be selected as the busiest group instead of a group with a mistfit task
and prevents load_balance to select the CPU with the misfit task to pull
the latter on a fitting CPU.
Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
Tested-by: Pierre Gondois <pierre.gondois@....com>
---
kernel/sched/fair.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9dafb374d76d..2eba6258a518 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9933,6 +9933,7 @@ struct sg_lb_stats {
unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
unsigned int group_smt_balance; /* Task on busy SMT be moved */
unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
+ unsigned int group_overutilized; /* At least one CPU is overutilized in the group */
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
@@ -10165,6 +10166,13 @@ group_has_capacity(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
static inline bool
group_is_overloaded(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
{
+ /*
+ * With EAS and uclamp, 1 CPU in the group must be overutilized to
+ * consider the group overloaded.
+ */
+ if (sched_energy_enabled() && !sgs->group_overutilized)
+ return false;
+
if (sgs->sum_nr_running <= sgs->group_weight)
return false;
@@ -10348,14 +10356,12 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
* @group: sched_group whose statistics are to be updated.
* @sgs: variable to hold the statistics for this group.
* @sg_overloaded: sched_group is overloaded
- * @sg_overutilized: sched_group is overutilized
*/
static inline void update_sg_lb_stats(struct lb_env *env,
struct sd_lb_stats *sds,
struct sched_group *group,
struct sg_lb_stats *sgs,
- bool *sg_overloaded,
- bool *sg_overutilized)
+ bool *sg_overloaded)
{
int i, nr_running, local_group, sd_flags = env->sd->flags;
bool balancing_at_rd = !env->sd->parent;
@@ -10377,7 +10383,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->sum_nr_running += nr_running;
if (cpu_overutilized(i))
- *sg_overutilized = 1;
+ sgs->group_overutilized = 1;
/*
* No need to call idle_cpu() if nr_running is not 0
@@ -11048,13 +11054,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
update_group_capacity(env->sd, env->dst_cpu);
}
- update_sg_lb_stats(env, sds, sg, sgs, &sg_overloaded, &sg_overutilized);
+ update_sg_lb_stats(env, sds, sg, sgs, &sg_overloaded);
if (!local_group && update_sd_pick_busiest(env, sds, sg, sgs)) {
sds->busiest = sg;
sds->busiest_stat = *sgs;
}
+ sg_overutilized |= sgs->group_overutilized;
+
/* Now, start updating sd_lb_stats */
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
--
2.43.0
Powered by blists - more mailing lists