[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fe1702b4-1b22-6293-11a2-2de82ff729a6@redhat.com>
Date: Fri, 18 Aug 2023 14:47:21 -0400
From: Waiman Long <longman@...hat.com>
To: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Daniel Bristot de Oliveira <bristot@...hat.com>,
Valentin Schneider <vschneid@...hat.com>
Cc: linux-kernel@...r.kernel.org, Phil Auld <pauld@...hat.com>,
Brent Rowsell <browsell@...hat.com>,
Peter Hunt <pehunt@...hat.com>
Subject: Re: [PATCH v3] sched/core: Use empty mask to reset cpumasks in
sched_setaffinity()
On 8/3/23 22:32, Waiman Long wrote:
> Since commit 8f9ea86fdf99 ("sched: Always preserve the user requested
> cpumask"), user provided CPU affinity via sched_setaffinity(2) is
> perserved even if the task is being moved to a different cpuset. However,
> that affinity is also being inherited by any subsequently created child
> processes which may not want or be aware of that affinity.
>
> One way to solve this problem is to provide a way to back off from
> that user provided CPU affinity. This patch implements such a scheme
> by using an empty cpumask to signal a reset of the cpumasks to the
> default as allowed by the current cpuset.
>
> Before this patch, passing in an empty cpumask to sched_setaffinity(2)
> will always return an -EINVAL error. With this patch, an alternative
> error of -ENODEV will be returned returned if sched_setaffinity(2)
> has been called before to set up user_cpus_ptr. In this case, the
> user_cpus_ptr that stores the user provided affinity will be cleared and
> the task's CPU affinity will be reset to that of the current cpuset. This
> alternative error code of -ENODEV signals that the no CPU is specified
> and, at the same time, a side effect of resetting cpu affinity to the
> cpuset default.
>
> If sched_setaffinity(2) has not been called previously, an EINVAL error
> will be returned with an empty cpumask just like before. Tests or
> tools that rely on the behavior that an empty cpumask will return an
> error code will not be affected.
>
> We will have to update the sched_setaffinity(2) manpage to document
> this possible side effect of passing in an empty cpumask.
>
> Signed-off-by: Waiman Long <longman@...hat.com>
Ping.
Are there other concerns about this patch? I haven't seen any error
report from kernel test robot so far.
Cheers,
Longman
> ---
> kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++---------
> 1 file changed, 33 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index c52c2eba7c73..3ef7397f2a61 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8317,7 +8317,12 @@ __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx)
> }
>
> cpuset_cpus_allowed(p, cpus_allowed);
> - cpumask_and(new_mask, ctx->new_mask, cpus_allowed);
> +
> + /* Default to cpus_allowed with NULL new_mask */
> + if (ctx->new_mask)
> + cpumask_and(new_mask, ctx->new_mask, cpus_allowed);
> + else
> + cpumask_copy(new_mask, cpus_allowed);
>
> ctx->new_mask = new_mask;
> ctx->flags |= SCA_CHECK;
> @@ -8366,6 +8371,7 @@ __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx)
>
> long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
> {
> + bool reset_cpumasks = cpumask_empty(in_mask);
> struct affinity_context ac;
> struct cpumask *user_mask;
> struct task_struct *p;
> @@ -8403,15 +8409,26 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
> goto out_put_task;
>
> /*
> - * With non-SMP configs, user_cpus_ptr/user_mask isn't used and
> - * alloc_user_cpus_ptr() returns NULL.
> + * If an empty cpumask is passed in and user_cpus_ptr is set,
> + * clear user_cpus_ptr and reset the current cpu affinity to the
> + * default for the current cpuset. If user_cpus_ptr isn't set,
> + * -EINVAL will be returned as before.
> */
> - user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE);
> - if (user_mask) {
> - cpumask_copy(user_mask, in_mask);
> - } else if (IS_ENABLED(CONFIG_SMP)) {
> - retval = -ENOMEM;
> - goto out_put_task;
> + if (reset_cpumasks && p->user_cpus_ptr) {
> + in_mask = NULL; /* To be updated in __sched_setaffinity */
> + user_mask = NULL;
> + } else {
> + /*
> + * With non-SMP configs, user_cpus_ptr/user_mask isn't used
> + * and alloc_user_cpus_ptr() returns NULL.
> + */
> + user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE);
> + if (user_mask) {
> + cpumask_copy(user_mask, in_mask);
> + } else if (IS_ENABLED(CONFIG_SMP)) {
> + retval = -ENOMEM;
> + goto out_put_task;
> + }
> }
>
> ac = (struct affinity_context){
> @@ -8423,6 +8440,13 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
> retval = __sched_setaffinity(p, &ac);
> kfree(ac.user_mask);
>
> + /*
> + * Force an error return (-ENODEV), if no error yet, for the empty
> + * cpumask case to avoid breaking existing tests.
> + */
> + if (reset_cpumasks && !retval)
> + retval = -ENODEV;
> +
> out_put_task:
> put_task_struct(p);
> return retval;
Powered by blists - more mailing lists