[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241113134908.GB402105@pauld.westford.csb>
Date: Wed, 13 Nov 2024 08:49:08 -0500
From: Phil Auld <pauld@...hat.com>
To: Juri Lelli <juri.lelli@...hat.com>
Cc: Waiman Long <longman@...hat.com>, Tejun Heo <tj@...nel.org>,
Johannes Weiner <hannes@...xchg.org>,
Michal Koutny <mkoutny@...e.com>, Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
Qais Yousef <qyousef@...alina.io>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
"Joel Fernandes (Google)" <joel@...lfernandes.org>,
Suleiman Souhlal <suleiman@...gle.com>,
Aashish Sharma <shraash@...gle.com>,
Shin Kawamura <kawasin@...gle.com>,
Vineeth Remanan Pillai <vineeth@...byteword.org>,
linux-kernel@...r.kernel.org, cgroups@...r.kernel.org
Subject: Re: [PATCH 2/2] sched/deadline: Correctly account for allocated
bandwidth during hotplug
Hi Juri,
On Wed, Nov 13, 2024 at 12:57:23PM +0000 Juri Lelli wrote:
> For hotplug operations, DEADLINE needs to check that there is still enough
> bandwidth left after removing the CPU that is going offline. We however
> fail to do so currently.
>
> Restore the correct behavior by restructuring dl_bw_manage() a bit, so
> that overflow conditions (not enough bandwidth left) are properly
> checked. Also account for dl_server bandwidth, i.e. discount such
> bandwidht in the calculation since NORMAL tasks will be anyway moved
"bandwidth" :)
> away from the CPU as a result of the hotplug operation.
>
LGTM.
Reviewed-by: Phil Auld <pauld@...hat.com>
> Signed-off-by: Juri Lelli <juri.lelli@...hat.com>
> ---
> kernel/sched/core.c | 2 +-
> kernel/sched/deadline.c | 33 ++++++++++++++++++++++++---------
> kernel/sched/sched.h | 2 +-
> 3 files changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 43e453ab7e20..d1049e784510 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8057,7 +8057,7 @@ static void cpuset_cpu_active(void)
> static int cpuset_cpu_inactive(unsigned int cpu)
> {
> if (!cpuhp_tasks_frozen) {
> - int ret = dl_bw_check_overflow(cpu);
> + int ret = dl_bw_deactivate(cpu);
>
> if (ret)
> return ret;
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index e53208a50279..609685c5df05 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -3467,29 +3467,31 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
> }
>
> enum dl_bw_request {
> - dl_bw_req_check_overflow = 0,
> + dl_bw_req_deactivate = 0,
> dl_bw_req_alloc,
> dl_bw_req_free
> };
>
> static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
> {
> - unsigned long flags;
> + unsigned long flags, cap;
> struct dl_bw *dl_b;
> bool overflow = 0;
> + u64 fair_server_bw = 0;
>
> rcu_read_lock_sched();
> dl_b = dl_bw_of(cpu);
> raw_spin_lock_irqsave(&dl_b->lock, flags);
>
> - if (req == dl_bw_req_free) {
> + cap = dl_bw_capacity(cpu);
> + switch (req) {
> + case dl_bw_req_free:
> __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
> - } else {
> - unsigned long cap = dl_bw_capacity(cpu);
> -
> + break;
> + case dl_bw_req_alloc:
> overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
>
> - if (req == dl_bw_req_alloc && !overflow) {
> + if (!overflow) {
> /*
> * We reserve space in the destination
> * root_domain, as we can't fail after this point.
> @@ -3498,6 +3500,19 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
> */
> __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
> }
> + break;
> + case dl_bw_req_deactivate:
> + /*
> + * cpu is going offline and NORMAL tasks will be moved away
> + * from it. We can thus discount dl_server bandwidth
> + * contribution as it won't need to be servicing tasks after
> + * the cpu is off.
> + */
> + if (cpu_rq(cpu)->fair_server.dl_server)
> + fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw;
> +
> + overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0);
> + break;
> }
>
> raw_spin_unlock_irqrestore(&dl_b->lock, flags);
> @@ -3506,9 +3521,9 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
> return overflow ? -EBUSY : 0;
> }
>
> -int dl_bw_check_overflow(int cpu)
> +int dl_bw_deactivate(int cpu)
> {
> - return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
> + return dl_bw_manage(dl_bw_req_deactivate, cpu, 0);
> }
>
> int dl_bw_alloc(int cpu, u64 dl_bw)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index b1c3588a8f00..1fee840f1bab 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -362,7 +362,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
> extern bool __checkparam_dl(const struct sched_attr *attr);
> extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
> extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
> -extern int dl_bw_check_overflow(int cpu);
> +extern int dl_bw_deactivate(int cpu);
> extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec);
> /*
> * SCHED_DEADLINE supports servers (nested scheduling) with the following
> --
> 2.47.0
>
>
--
Powered by blists - more mailing lists