[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250904041516.3046-14-kprateek.nayak@amd.com>
Date: Thu, 4 Sep 2025 04:15:09 +0000
From: K Prateek Nayak <kprateek.nayak@....com>
To: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot
<vincent.guittot@...aro.org>, Anna-Maria Behnsen <anna-maria@...utronix.de>,
Frederic Weisbecker <frederic@...nel.org>, Thomas Gleixner
<tglx@...utronix.de>, <linux-kernel@...r.kernel.org>
CC: Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt
<rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>, Mel Gorman
<mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>, K Prateek Nayak
<kprateek.nayak@....com>, "Gautham R. Shenoy" <gautham.shenoy@....com>,
Swapnil Sapkal <swapnil.sapkal@....com>
Subject: [RFC PATCH 13/19] sched/fair: Extract the main _nohz_idle_balance() loop into a helper
With the distributed nohz idle CPU tracking using the
"nohz_shared_list", the trick to do the load balancing for the CPU
in charge of the nohz idle balance at the very end using
for_each_cpu_warp() will no longer work.
Extract the main loop and the loop body doing the load balancing on
behalf of an idle target into two separate helpers -
sched_balance_nohz_idle() and sched_balance_idle_rq() respectively
keeping consistent with the naming convention in fair.c.
This will help transitioning _nohz_idle_balance() to use the
"nohz_shared_list" in the subsequent commit.
No functional changes intended.
Signed-off-by: K Prateek Nayak <kprateek.nayak@....com>
---
kernel/sched/fair.c | 161 ++++++++++++++++++++++++++++----------------
1 file changed, 102 insertions(+), 59 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d13de6feb467..ad6e97be97d5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12580,6 +12580,97 @@ static bool update_nohz_stats(struct rq *rq)
return rq->has_blocked_load;
}
+/*
+ * sched_balance_idle_rq(): Balance a target idle rq.
+ * @rq: rq of the idle CPU that may require stats update or balancing.
+ * @flags: nohz flags of the balancing CPU.
+ * @next_balance: Pointer to variable storing the time in jiffies to
+ * trigger the next nohz idle balancing.
+ *
+ * Returns: Update flags for caller to take appropriate actions.
+ * NOHZ_STATS_KICK indicates rq still has blocked load after the update.
+ * NOHZ_NEXT_KICK indicates that *next_balance was updated to rq->next_balance.
+ */
+static unsigned int sched_balance_idle_rq(struct rq *rq,
+ unsigned int flags,
+ unsigned long *next_balance)
+{
+ unsigned int update_flags = 0;
+
+ /* If rq has blocked load, indicate via NOHZ_STATS_KICK. */
+ if ((flags & NOHZ_STATS_KICK) && update_nohz_stats(rq))
+ update_flags |= NOHZ_STATS_KICK;
+
+ /* If time for next balance is due, do the balance. */
+ if (time_after_eq(jiffies, rq->next_balance)) {
+ struct rq_flags rf;
+
+ rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq);
+ rq_unlock_irqrestore(rq, &rf);
+
+ if (flags & NOHZ_BALANCE_KICK)
+ sched_balance_domains(rq, CPU_IDLE);
+ }
+
+ /* Indicate update to next_balance via NOHZ_NEXT_KICK. */
+ if (time_after(*next_balance, rq->next_balance)) {
+ *next_balance = rq->next_balance;
+ update_flags |= NOHZ_NEXT_KICK;
+ }
+
+ return update_flags;
+}
+
+/*
+ * sched_balance_nohz_idle(): Core nohz idle balancing loop.
+ * @balancing_cpu: CPU doing the balancing on behalf of all nohz idle CPUs.
+ * @flags: nohz flags of the balancing CPU.
+ * @start: Time in jiffies when nohz indicators were cleared.
+ *
+ * Returns:
+ * < 0 - The balancing CPU turned busy while balancing.
+ * 0 - All CPUs were balanced; No blocked load if @flags had NOHZ_STATS_KICK.
+ * > 0 - One or more idle CPUs still have blocked load if @flags had
+ * NOHZ_STATS_KICK.
+ */
+static int sched_balance_nohz_idle(int balancing_cpu, unsigned int flags, unsigned long start)
+{
+ /* Earliest time when we have to do rebalance again */
+ unsigned long next_balance = start + 60*HZ;
+ unsigned int update_flags = 0;
+ int target_cpu;
+
+ /*
+ * Start with the next CPU after the balancing CPU so we will end with
+ * balancing CPU and let a chance for other idle cpu to pull load.
+ */
+ for_each_cpu_wrap(target_cpu, nohz.idle_cpus_mask, balancing_cpu + 1) {
+ if (!idle_cpu(target_cpu))
+ continue;
+
+ /*
+ * If balancing CPU gets work to do, stop the load balancing
+ * work being done for other CPUs. Next load balancing owner
+ * will pick it up.
+ */
+ if (!idle_cpu(balancing_cpu) && need_resched())
+ return -EBUSY;
+
+ update_flags |= sched_balance_idle_rq(cpu_rq(target_cpu), flags, &next_balance);
+ }
+
+ /*
+ * next_balance will be updated only when there is a need.
+ * When the CPU is attached to null domain for ex, it will not be
+ * updated.
+ */
+ if (likely(update_flags & NOHZ_NEXT_KICK))
+ nohz.next_balance = next_balance;
+
+ return update_flags & NOHZ_STATS_KICK;
+}
+
/*
* Internal function that runs load balance for all idle CPUs. The load balance
* can be a simple update of blocked load or a complete load balance with
@@ -12587,14 +12678,8 @@ static bool update_nohz_stats(struct rq *rq)
*/
static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
{
- /* Earliest time when we have to do rebalance again */
unsigned long now = jiffies;
- unsigned long next_balance = now + 60*HZ;
- bool has_blocked_load = false;
- int update_next_balance = 0;
- int this_cpu = this_rq->cpu;
- int balance_cpu;
- struct rq *rq;
+ int ret;
WARN_ON_ONCE((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
@@ -12619,60 +12704,18 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
*/
smp_mb();
- /*
- * Start with the next CPU after this_cpu so we will end with this_cpu and let a
- * chance for other idle cpu to pull load.
- */
- for_each_cpu_wrap(balance_cpu, nohz.idle_cpus_mask, this_cpu+1) {
- if (!idle_cpu(balance_cpu))
- continue;
-
- /*
- * If this CPU gets work to do, stop the load balancing
- * work being done for other CPUs. Next load
- * balancing owner will pick it up.
- */
- if (!idle_cpu(this_cpu) && need_resched()) {
- if (flags & NOHZ_STATS_KICK)
- has_blocked_load = true;
- if (flags & NOHZ_NEXT_KICK)
- WRITE_ONCE(nohz.needs_update, 1);
- goto abort;
- }
-
- rq = cpu_rq(balance_cpu);
-
- if (flags & NOHZ_STATS_KICK)
- has_blocked_load |= update_nohz_stats(rq);
-
- /*
- * If time for next balance is due,
- * do the balance.
- */
- if (time_after_eq(jiffies, rq->next_balance)) {
- struct rq_flags rf;
-
- rq_lock_irqsave(rq, &rf);
- update_rq_clock(rq);
- rq_unlock_irqrestore(rq, &rf);
-
- if (flags & NOHZ_BALANCE_KICK)
- sched_balance_domains(rq, CPU_IDLE);
- }
-
- if (time_after(next_balance, rq->next_balance)) {
- next_balance = rq->next_balance;
- update_next_balance = 1;
- }
- }
+ ret = sched_balance_nohz_idle(cpu_of(this_rq), flags, now);
/*
- * next_balance will be updated only when there is a need.
- * When the CPU is attached to null domain for ex, it will not be
- * updated.
+ * The balancing CPU turned busy. Set nohz.{needs_update,has_blocked}
+ * indicators to ensure next CPU observing them triggers nohz idle
+ * balance again.
*/
- if (likely(update_next_balance))
- nohz.next_balance = next_balance;
+ if (ret < 0) {
+ if (flags & NOHZ_NEXT_KICK)
+ WRITE_ONCE(nohz.needs_update, 1);
+ goto abort;
+ }
if (flags & NOHZ_STATS_KICK)
WRITE_ONCE(nohz.next_blocked,
@@ -12680,7 +12723,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
abort:
/* There is still blocked load, enable periodic update */
- if (has_blocked_load)
+ if (ret)
WRITE_ONCE(nohz.has_blocked, 1);
}
--
2.34.1
Powered by blists - more mailing lists