[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f8baea06-eeda-439a-3699-1cad7cde659e@redhat.com>
Date: Wed, 29 Mar 2023 10:31:15 -0400
From: Waiman Long <longman@...hat.com>
To: Juri Lelli <juri.lelli@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...nel.org>,
Qais Yousef <qyousef@...alina.io>, Tejun Heo <tj@...nel.org>,
Zefan Li <lizefan.x@...edance.com>,
Johannes Weiner <hannes@...xchg.org>,
Hao Luo <haoluo@...gle.com>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
linux-kernel@...r.kernel.org, luca.abeni@...tannapisa.it,
claudio@...dence.eu.com, tommaso.cucinotta@...tannapisa.it,
bristot@...hat.com, mathieu.poirier@...aro.org,
cgroups@...r.kernel.org,
Vincent Guittot <vincent.guittot@...aro.org>,
Wei Wang <wvw@...gle.com>, Rick Yiu <rickyiu@...gle.com>,
Quentin Perret <qperret@...gle.com>,
Heiko Carstens <hca@...ux.ibm.com>,
Vasily Gorbik <gor@...ux.ibm.com>,
Alexander Gordeev <agordeev@...ux.ibm.com>,
Sudeep Holla <sudeep.holla@....com>
Subject: Re: [PATCH 5/6] cgroup/cpuset: Free DL BW in case can_attach() fails
On 3/29/23 10:25, Waiman Long wrote:
>
> On 3/29/23 08:55, Juri Lelli wrote:
>> From: Dietmar Eggemann <dietmar.eggemann@....com>
>>
>> cpuset_can_attach() can fail. Postpone DL BW allocation until all tasks
>> have been checked. DL BW is not allocated per-task but as a sum over
>> all DL tasks migrating.
>>
>> If multiple controllers are attached to the cgroup next to the cuset
> Typo: : "cuset" => "cpuset"
>> controller a non-cpuset can_attach() can fail. In this case free DL BW
>> in cpuset_cancel_attach().
>>
>> Finally, update cpuset DL task count (nr_deadline_tasks) only in
>> cpuset_attach().
>>
>> Suggested-by: Waiman Long <longman@...hat.com>
>> Signed-off-by: Dietmar Eggemann <dietmar.eggemann@....com>
>> Signed-off-by: Juri Lelli <juri.lelli@...hat.com>
>> ---
>> include/linux/sched.h | 2 +-
>> kernel/cgroup/cpuset.c | 55 ++++++++++++++++++++++++++++++++++++++----
>> kernel/sched/core.c | 17 ++-----------
>> 3 files changed, 53 insertions(+), 21 deletions(-)
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 6f3d84e0ed08..50cbbfefbe11 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -1847,7 +1847,7 @@ current_restore_flags(unsigned long orig_flags,
>> unsigned long flags)
>> }
>> extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
>> const struct cpumask *trial);
>> -extern int task_can_attach(struct task_struct *p, const struct
>> cpumask *cs_effective_cpus);
>> +extern int task_can_attach(struct task_struct *p);
>> extern int dl_bw_alloc(int cpu, u64 dl_bw);
>> extern void dl_bw_free(int cpu, u64 dl_bw);
>> #ifdef CONFIG_SMP
>> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
>> index eb0854ef9757..f8ebec66da51 100644
>> --- a/kernel/cgroup/cpuset.c
>> +++ b/kernel/cgroup/cpuset.c
>> @@ -198,6 +198,8 @@ struct cpuset {
>> * know when to rebuild associated root domain bandwidth
>> information.
>> */
>> int nr_deadline_tasks;
>> + int nr_migrate_dl_tasks;
>> + u64 sum_migrate_dl_bw;
>> /* Invalid partition error code, not lock protected */
>> enum prs_errcode prs_err;
>> @@ -2464,16 +2466,23 @@ static int fmeter_getrate(struct fmeter *fmp)
>> static struct cpuset *cpuset_attach_old_cs;
>> +static void reset_migrate_dl_data(struct cpuset *cs)
>> +{
>> + cs->nr_migrate_dl_tasks = 0;
>> + cs->sum_migrate_dl_bw = 0;
>> +}
>> +
>> /* Called by cgroups to determine if a cpuset is usable;
>> cpuset_mutex held */
>> static int cpuset_can_attach(struct cgroup_taskset *tset)
>> {
>> struct cgroup_subsys_state *css;
>> - struct cpuset *cs;
>> + struct cpuset *cs, *oldcs;
>> struct task_struct *task;
>> int ret;
>> /* used later by cpuset_attach() */
>> cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
>> + oldcs = cpuset_attach_old_cs;
>> cs = css_cs(css);
>> mutex_lock(&cpuset_mutex);
>> @@ -2491,7 +2500,7 @@ static int cpuset_can_attach(struct
>> cgroup_taskset *tset)
>> goto out_unlock;
>> cgroup_taskset_for_each(task, css, tset) {
>> - ret = task_can_attach(task, cs->effective_cpus);
>> + ret = task_can_attach(task);
>> if (ret)
>> goto out_unlock;
>> ret = security_task_setscheduler(task);
>> @@ -2499,11 +2508,31 @@ static int cpuset_can_attach(struct
>> cgroup_taskset *tset)
>> goto out_unlock;
>> if (dl_task(task)) {
>> - cs->nr_deadline_tasks++;
>> - cpuset_attach_old_cs->nr_deadline_tasks--;
>> + cs->nr_migrate_dl_tasks++;
>> + cs->sum_migrate_dl_bw += task->dl.dl_bw;
>> + }
>> + }
>> +
>> + if (!cs->nr_migrate_dl_tasks)
>> + goto out_succes;
>> +
>> + if (!cpumask_intersects(oldcs->effective_cpus,
>> cs->effective_cpus)) {
>> + int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
>> +
>> + if (unlikely(cpu >= nr_cpu_ids)) {
>> + reset_migrate_dl_data(cs);
>> + ret = -EINVAL;
>> + goto out_unlock;
>> + }
>> +
>> + ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
>> + if (ret) {
>> + reset_migrate_dl_data(cs);
>> + goto out_unlock;
>> }
>> }
>> +out_succes:
>> /*
>> * Mark attach is in progress. This makes validate_change() fail
>> * changes which zero cpus/mems_allowed.
>> @@ -2518,11 +2547,21 @@ static int cpuset_can_attach(struct
>> cgroup_taskset *tset)
>> static void cpuset_cancel_attach(struct cgroup_taskset *tset)
>> {
>> struct cgroup_subsys_state *css;
>> + struct cpuset *cs;
>> cgroup_taskset_first(tset, &css);
>> + cs = css_cs(css);
>> mutex_lock(&cpuset_mutex);
>> - css_cs(css)->attach_in_progress--;
>> + cs->attach_in_progress--;
>> +
>> + if (cs->nr_migrate_dl_tasks) {
>> + int cpu = cpumask_any(cs->effective_cpus);
>> +
>> + dl_bw_free(cpu, cs->sum_migrate_dl_bw);
>> + reset_migrate_dl_data(cs);
>> + }
>> +
Another nit that I have is that you may have to record also the cpu
where the DL bandwidth is allocated in cpuset_can_attach() and free the
bandwidth back into that cpu or there can be an underflow if another cpu
is chosen.
Cheers,
Longman
Powered by blists - more mailing lists