[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220826010119.1265764-5-longman@redhat.com>
Date: Thu, 25 Aug 2022 21:01:18 -0400
From: Waiman Long <longman@...hat.com>
To: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Daniel Bristot de Oliveira <bristot@...hat.com>,
Valentin Schneider <vschneid@...hat.com>,
Tejun Heo <tj@...nel.org>, Zefan Li <lizefan.x@...edance.com>,
Johannes Weiner <hannes@...xchg.org>,
Will Deacon <will@...nel.org>
Cc: linux-kernel@...r.kernel.org,
Linus Torvalds <torvalds@...ux-foundation.org>,
Lai Jiangshan <jiangshanlai@...il.com>,
Waiman Long <longman@...hat.com>
Subject: [PATCH v6 4/5] sched: Handle set_cpus_allowed_ptr() & sched_setaffinity() race
Racing is possible between set_cpus_allowed_ptr() and sched_setaffinity()
or between multiple sched_setaffinity() calls from different CPUs. To
resolve these race conditions, we need to update both user_cpus_ptr
and cpus_mask in a single lock critical section instead of separated
ones. This requires moving the user_cpus_ptr update to
affine_move_task() before doing task_rq_unlock().
A new argument puser_mask is added to affine_move_task(),
__set_cpus_allowed_ptr_locked() and __set_cpus_allowed_ptr() to do that.
Ideally, user_cpus_ptr should only be updated if the sched_setaffinity()
is successful. However, this patch will update user_cpus_ptr when the
first call to __set_cpus_allowed_ptr() is successful. However, if there
is racing between sched_setaffinity() and cpuset update, the subsequent
calls to __set_cpus_allowed_ptr() may fail but the user_cpus_ptr will
still be updated in this corner case.
Signed-off-by: Waiman Long <longman@...hat.com>
---
kernel/sched/core.c | 66 ++++++++++++++++++++++++++++-----------------
1 file changed, 42 insertions(+), 24 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1c2f548e5369..6cd1177fbcea 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2199,7 +2199,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32
static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask,
- u32 flags);
+ u32 flags, struct cpumask **puser_mask);
static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
{
@@ -2249,7 +2249,7 @@ void migrate_enable(void)
*/
preempt_disable();
if (p->cpus_ptr != &p->cpus_mask)
- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
+ __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE, NULL);
/*
* Mustn't clear migration_disabled() until cpus_ptr points back at the
* regular cpus_mask, otherwise things that race (eg.
@@ -2618,6 +2618,15 @@ void release_user_cpus_ptr(struct task_struct *p)
kfree(clear_user_cpus_ptr(p));
}
+static inline void swap_user_cpus_ptr(struct task_struct *p,
+ struct cpumask **puser_mask)
+{
+ if (!puser_mask)
+ return;
+
+ swap(p->user_cpus_ptr, *puser_mask);
+}
+
/*
* This function is wildly self concurrent; here be dragons.
*
@@ -2693,9 +2702,12 @@ void release_user_cpus_ptr(struct task_struct *p)
* Note that the above is safe vs a concurrent migrate_enable(), as any
* pending affinity completion is preceded by an uninstallation of
* p->migration_pending done with p->pi_lock held.
+ *
+ * The puser_mask pointer, if defined, will cause its swap with the current
+ * user_cpus_ptr value if operation succeeds.
*/
static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
- int dest_cpu, unsigned int flags)
+ int dest_cpu, unsigned int flags, struct cpumask **puser_mask)
__releases(rq->lock)
__releases(p->pi_lock)
{
@@ -2722,6 +2734,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
complete = true;
}
+ swap_user_cpus_ptr(p, puser_mask);
task_rq_unlock(rq, p, rf);
if (push_task) {
@@ -2793,6 +2806,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
if (flags & SCA_MIGRATE_ENABLE)
p->migration_flags &= ~MDF_PUSH;
+ swap_user_cpus_ptr(p, puser_mask);
task_rq_unlock(rq, p, rf);
if (!stop_pending) {
@@ -2813,6 +2827,8 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
complete = true;
}
}
+
+ swap_user_cpus_ptr(p, puser_mask);
task_rq_unlock(rq, p, rf);
if (complete)
@@ -2843,7 +2859,8 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
const struct cpumask *new_mask,
u32 flags,
struct rq *rq,
- struct rq_flags *rf)
+ struct rq_flags *rf,
+ struct cpumask **puser_mask)
__releases(rq->lock)
__releases(p->pi_lock)
{
@@ -2908,7 +2925,7 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
__do_set_cpus_allowed(p, new_mask, flags);
- return affine_move_task(rq, p, rf, dest_cpu, flags);
+ return affine_move_task(rq, p, rf, dest_cpu, flags, puser_mask);
out:
task_rq_unlock(rq, p, rf);
@@ -2926,7 +2943,8 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
* call is not atomic; no spinlocks may be held.
*/
static int __set_cpus_allowed_ptr(struct task_struct *p,
- const struct cpumask *new_mask, u32 flags)
+ const struct cpumask *new_mask, u32 flags,
+ struct cpumask **puser_mask)
{
struct cpumask *alloc_mask = NULL;
struct rq_flags rf;
@@ -2934,8 +2952,11 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
int ret;
rq = task_rq_lock(p, &rf);
- if (p->user_cpus_ptr) {
+ /*
+ * user_cpus_ptr masking is skipped if puser_mask is defined.
+ */
+ if (p->user_cpus_ptr && !puser_mask) {
/*
* A scratch cpumask is allocated on the percpu runqueues
* to enable additional masking with user_cpus_ptr. This
@@ -2958,7 +2979,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
}
- ret = __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
+ ret = __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf,
+ puser_mask);
if (unlikely(alloc_mask))
kfree(alloc_mask);
return ret;
@@ -2966,7 +2988,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
- return __set_cpus_allowed_ptr(p, new_mask, 0);
+ return __set_cpus_allowed_ptr(p, new_mask, 0, NULL);
}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
@@ -3004,7 +3026,7 @@ static int restrict_cpus_allowed_ptr(struct task_struct *p,
goto err_unlock;
}
- return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);
+ return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf, NULL);
err_unlock:
task_rq_unlock(rq, p, &rf);
@@ -3551,7 +3573,7 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask,
- u32 flags)
+ u32 flags, struct cpumask **puser_mask)
{
return set_cpus_allowed_ptr(p, new_mask);
}
@@ -8109,29 +8131,25 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask, bool save
}
cpumask_copy(user_mask, mask);
}
-again:
- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
+
+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK,
+ user_mask ? &user_mask : NULL);
if (retval)
goto out_free_new_mask;
- cpuset_cpus_allowed(p, cpus_allowed);
- if (!cpumask_subset(new_mask, cpus_allowed)) {
+ for (;;) {
+ cpuset_cpus_allowed(p, cpus_allowed);
+ if (cpumask_subset(new_mask, cpus_allowed))
+ break;
+
/*
* We must have raced with a concurrent cpuset update.
* Just reset the cpumask to the cpuset's cpus_allowed.
*/
cpumask_copy(new_mask, cpus_allowed);
- goto again;
+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK, NULL);
}
- if (save_mask) {
- unsigned long flags;
-
- /* Use pi_lock to synchronize changes to user_cpus_ptr */
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- swap(p->user_cpus_ptr, user_mask);
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
- }
out_free_new_mask:
kfree(user_mask);
free_cpumask_var(new_mask);
--
2.31.1
Powered by blists - more mailing lists