[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140212140620.GD9987@twins.programming.kicks-ass.net>
Date: Wed, 12 Feb 2014 15:06:20 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Kirill Tkhai <tkhai@...dex.ru>
Cc: "mingo@...nel.org" <mingo@...nel.org>,
"hpa@...or.com" <hpa@...or.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"tglx@...utronix.de" <tglx@...utronix.de>,
"linux-tip-commits@...r.kernel.org"
<linux-tip-commits@...r.kernel.org>,
Steven Rostedt <rostedt@...dmis.org>,
Juri Lelli <juri.lelli@...il.com>,
Dan Carpenter <dan.carpenter@...cle.com>
Subject: Re: [tip:sched/core] sched: Push put_prev_task() into
pick_next_task( )
On Wed, Feb 12, 2014 at 11:00:53AM +0400, Kirill Tkhai wrote:
> > @@ -4748,7 +4743,7 @@ static void migrate_tasks(unsigned int dead_cpu)
> > if (rq->nr_running == 1)
> > break;
> >
> > - next = pick_next_task(rq);
> > + next = pick_next_task(rq, NULL);
>
> pick_next_task() firstly checks for prev->sched_class, doesn't it ???
>
> The same for pick_next_task_rt().
OK, how about something like this?
---
Subject: sched: Fix hotplug task migration
From: Peter Zijlstra <peterz@...radead.org>
Date: Wed, 12 Feb 2014 10:49:30 +0100
Dan Carpenter reported:
> kernel/sched/rt.c:1347 pick_next_task_rt() warn: variable dereferenced before check 'prev' (see line 1338)
> kernel/sched/deadline.c:1011 pick_next_task_dl() warn: variable dereferenced before check 'prev' (see line 1005)
Kirill also spotted that migrate_tasks() will have an instant NULL
deref because pick_next_task() will immediately deref prev.
Instead of fixing all the corner cases because migrate_tasks() can
pass in a NULL prev task in the unlikely case of hot-un-plug, provide
a fake task such that we can remove all the NULL checks from the far
more common paths.
A further problem; not previously spotted; is that because we pushed
pre_schedule() and idle_balance() into pick_next_task() we now need to
avoid those getting called and pulling more tasks on our dying CPU.
We avoid pull_{dl,rt}_task() by setting fake_task.prio to MAX_PRIO+1.
We also note that since we call pick_next_task() exactly the amount of
times we have runnable tasks present, we should never land in
idle_balance().
Fixes: 38033c37faab ("sched: Push down pre_schedule() and idle_balance()")
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Juri Lelli <juri.lelli@...il.com>
Cc: Ingo Molnar <mingo@...nel.org>
Reported-by: Kirill Tkhai <tkhai@...dex.ru>
Reported-by: Dan Carpenter <dan.carpenter@...cle.com>
Signed-off-by: Peter Zijlstra <peterz@...radead.org>
Link: http://lkml.kernel.org/r/20140212094930.GB3545@laptop.programming.kicks-ass.net
---
kernel/sched/core.c | 18 +++++++++++++++++-
kernel/sched/deadline.c | 3 +--
kernel/sched/fair.c | 5 ++---
kernel/sched/idle_task.c | 3 +--
kernel/sched/rt.c | 3 +--
kernel/sched/stop_task.c | 3 +--
6 files changed, 23 insertions(+), 12 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4681,6 +4681,22 @@ static void calc_load_migrate(struct rq
atomic_long_add(delta, &calc_load_tasks);
}
+static void put_prev_task_fake(struct rq *rq, struct task_struct *prev)
+{
+}
+
+static const struct sched_class fake_sched_class = {
+ .put_prev_task = put_prev_task_fake,
+};
+
+static struct task_struct fake_task = {
+ /*
+ * Avoid pull_{rt,dl}_task()
+ */
+ .prio = MAX_PRIO + 1,
+ .sched_class = &fake_sched_class,
+};
+
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
@@ -4721,7 +4737,7 @@ static void migrate_tasks(unsigned int d
if (rq->nr_running == 1)
break;
- next = pick_next_task(rq, NULL);
+ next = pick_next_task(rq, &fake_task);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1008,8 +1008,7 @@ struct task_struct *pick_next_task_dl(st
if (unlikely(!dl_rq->dl_nr_running))
return NULL;
- if (prev)
- prev->sched_class->put_prev_task(rq, prev);
+ prev->sched_class->put_prev_task(rq, prev);
dl_se = pick_next_dl_entity(rq, dl_rq);
BUG_ON(!dl_se);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4690,7 +4690,7 @@ pick_next_task_fair(struct rq *rq, struc
if (!cfs_rq->nr_running)
goto idle;
- if (!prev || prev->sched_class != &fair_sched_class)
+ if (prev->sched_class != &fair_sched_class)
goto simple;
/*
@@ -4766,8 +4766,7 @@ pick_next_task_fair(struct rq *rq, struc
if (!cfs_rq->nr_running)
goto idle;
- if (prev)
- prev->sched_class->put_prev_task(rq, prev);
+ prev->sched_class->put_prev_task(rq, prev);
do {
se = pick_next_entity(cfs_rq, NULL);
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -26,8 +26,7 @@ static void check_preempt_curr_idle(stru
static struct task_struct *
pick_next_task_idle(struct rq *rq, struct task_struct *prev)
{
- if (prev)
- prev->sched_class->put_prev_task(rq, prev);
+ prev->sched_class->put_prev_task(rq, prev);
schedstat_inc(rq, sched_goidle);
#ifdef CONFIG_SMP
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1344,8 +1344,7 @@ pick_next_task_rt(struct rq *rq, struct
if (rt_rq_throttled(rt_rq))
return NULL;
- if (prev)
- prev->sched_class->put_prev_task(rq, prev);
+ prev->sched_class->put_prev_task(rq, prev);
p = _pick_next_task_rt(rq);
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -31,8 +31,7 @@ pick_next_task_stop(struct rq *rq, struc
if (!stop || !stop->on_rq)
return NULL;
- if (prev)
- prev->sched_class->put_prev_task(rq, prev);
+ prev->sched_class->put_prev_task(rq, prev);
stop->se.exec_start = rq_clock_task(rq);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists