[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150528135355.GK3644@twins.programming.kicks-ass.net>
Date: Thu, 28 May 2015 15:53:55 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Mike Galbraith <umgwanakikbuti@...il.com>
Cc: LKML <linux-kernel@...r.kernel.org>, Ingo Molnar <mingo@...e.hu>,
ktkhai@...allels.com
Subject: Re: sched_setscheduler() vs idle_balance() race
On Thu, May 28, 2015 at 09:43:52AM +0200, Mike Galbraith wrote:
> Hi Peter,
>
> I'm not seeing what prevents pull_task() from yanking a task out from
> under __sched_setscheduler(). A box sprinkling smoldering 3.0 kernel
> wreckage all over my bugzilla mbox isn't seeing it either ;-)
Say, how easy can that thing be reproduced?
The below is compile tested only, but it might just work if I didn't
miss anything :-)
---
kernel/sched/core.c | 137 +++++++++++++++++++++++++++++-----------------------
1 file changed, 77 insertions(+), 60 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4eec60757b16..28f1ddc0bef2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1000,22 +1000,6 @@ inline int task_curr(const struct task_struct *p)
return cpu_curr(task_cpu(p)) == p;
}
-/*
- * Can drop rq->lock because from sched_class::switched_from() methods drop it.
- */
-static inline void check_class_changed(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class,
- int oldprio)
-{
- if (prev_class != p->sched_class) {
- if (prev_class->switched_from)
- prev_class->switched_from(rq, p);
- /* Possble rq->lock 'hole'. */
- p->sched_class->switched_to(rq, p);
- } else if (oldprio != p->prio || dl_task(p))
- p->sched_class->prio_changed(rq, p, oldprio);
-}
-
void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
const struct sched_class *class;
@@ -3075,12 +3059,38 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
p->prio = prio;
+ if (prev_class != p->sched_class) {
+ prev_class->switched_from(rq, p);
+ /*
+ * switched_from() is allowed to drop @rq->lock; which opens a
+ * race against load-balancing, however since @p is not
+ * currently enqueued it is invisible to the load-balancer.
+ *
+ * double check @p is still where we thought it was.
+ */
+ WARN_ON_ONCE(task_rq(p) != rq);
+ }
+
if (running)
p->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, p, enqueue_flag);
- check_class_changed(rq, p, prev_class, oldprio);
+ /*
+ * Both switched_to() and prio_changed() are allowed to drop @rq->lock;
+ * which opens a race against load-balancing, and since @p is now
+ * enqueued it can indeed be subject to this.
+ *
+ * This means that any balancing done by these functions must double
+ * check a task's rq.
+ */
+ if (prev_class != p->sched_class)
+ p->sched_class->switched_to(rq, p);
+ else if (oldprio != p->prio || dl_task(p))
+ p->sched_class->prio_changed(rq, p, oldprio);
+ /*
+ * It further means we should not rely on @p's rq from here on.
+ */
out_unlock:
__task_rq_unlock(rq);
}
@@ -3420,7 +3430,7 @@ static bool dl_param_changed(struct task_struct *p,
static int __sched_setscheduler(struct task_struct *p,
const struct sched_attr *attr,
- bool user)
+ bool user, bool pi)
{
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
@@ -3606,18 +3616,20 @@ static int __sched_setscheduler(struct task_struct *p,
p->sched_reset_on_fork = reset_on_fork;
oldprio = p->prio;
- /*
- * Take priority boosted tasks into account. If the new
- * effective priority is unchanged, we just store the new
- * normal parameters and do not touch the scheduler class and
- * the runqueue. This will be done when the task deboost
- * itself.
- */
- new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
- if (new_effective_prio == oldprio) {
- __setscheduler_params(p, attr);
- task_rq_unlock(rq, p, &flags);
- return 0;
+ if (pi) {
+ /*
+ * Take priority boosted tasks into account. If the new
+ * effective priority is unchanged, we just store the new
+ * normal parameters and do not touch the scheduler class and
+ * the runqueue. This will be done when the task deboost
+ * itself.
+ */
+ new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+ if (new_effective_prio == oldprio) {
+ __setscheduler_params(p, attr);
+ task_rq_unlock(rq, p, &flags);
+ return 0;
+ }
}
queued = task_on_rq_queued(p);
@@ -3628,7 +3640,19 @@ static int __sched_setscheduler(struct task_struct *p,
put_prev_task(rq, p);
prev_class = p->sched_class;
- __setscheduler(rq, p, attr, true);
+ __setscheduler(rq, p, attr, pi);
+
+ if (prev_class != p->sched_class) {
+ prev_class->switched_from(rq, p);
+ /*
+ * switched_from() is allowed to drop @rq->lock; which opens a
+ * race against load-balancing, however since @p is not
+ * currently enqueued it is invisible to the load-balancer.
+ *
+ * double check @p is still where we thought it was.
+ */
+ WARN_ON_ONCE(task_rq(p) != rq);
+ }
if (running)
p->sched_class->set_curr_task(rq);
@@ -3640,10 +3664,25 @@ static int __sched_setscheduler(struct task_struct *p,
enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
}
- check_class_changed(rq, p, prev_class, oldprio);
+ /*
+ * Both switched_to() and prio_changed() are allowed to drop @rq->lock;
+ * which opens a race against load-balancing, and since @p is now
+ * enqueued it can indeed be subject to this.
+ *
+ * This means that any balancing done by these functions must double
+ * check a task's rq.
+ */
+ if (prev_class != p->sched_class)
+ p->sched_class->switched_to(rq, p);
+ else if (oldprio != p->prio || dl_task(p))
+ p->sched_class->prio_changed(rq, p, oldprio);
+ /*
+ * It further means we should not rely on @p's rq from here on.
+ */
task_rq_unlock(rq, p, &flags);
- rt_mutex_adjust_pi(p);
+ if (pi)
+ rt_mutex_adjust_pi(p);
return 0;
}
@@ -3664,7 +3703,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
attr.sched_policy = policy;
}
- return __sched_setscheduler(p, &attr, check);
+ return __sched_setscheduler(p, &attr, check, true);
}
/**
* sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
@@ -3685,7 +3724,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
{
- return __sched_setscheduler(p, attr, true);
+ return __sched_setscheduler(p, attr, true, true);
}
EXPORT_SYMBOL_GPL(sched_setattr);
@@ -7346,32 +7385,12 @@ EXPORT_SYMBOL(___might_sleep);
#endif
#ifdef CONFIG_MAGIC_SYSRQ
-static void normalize_task(struct rq *rq, struct task_struct *p)
+void normalize_rt_tasks(void)
{
- const struct sched_class *prev_class = p->sched_class;
+ struct task_struct *g, *p;
struct sched_attr attr = {
.sched_policy = SCHED_NORMAL,
};
- int old_prio = p->prio;
- int queued;
-
- queued = task_on_rq_queued(p);
- if (queued)
- dequeue_task(rq, p, 0);
- __setscheduler(rq, p, &attr, false);
- if (queued) {
- enqueue_task(rq, p, 0);
- resched_curr(rq);
- }
-
- check_class_changed(rq, p, prev_class, old_prio);
-}
-
-void normalize_rt_tasks(void)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- struct rq *rq;
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
@@ -7398,9 +7417,7 @@ void normalize_rt_tasks(void)
continue;
}
- rq = task_rq_lock(p, &flags);
- normalize_task(rq, p);
- task_rq_unlock(rq, p, &flags);
+ __sched_setscheduler(p, &attr, false, false);
}
read_unlock(&tasklist_lock);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists