[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20080429094608.GB23198@elte.hu>
Date: Tue, 29 Apr 2008 11:46:08 +0200
From: Ingo Molnar <mingo@...e.hu>
To: Peter Zijlstra <peterz@...radead.org>
Cc: ego@...ibm.com, Dhaval Giani <dhaval@...ux.vnet.ibm.com>,
Srivatsa Vaddagiri <vatsa@...ibm.com>,
Balbir Singh <balbir@...ibm.com>, linux-kernel@...r.kernel.org
Subject: Re: [BUG-REPORT] hrtick_start_fair and CPU-Hotplug
* Peter Zijlstra <peterz@...radead.org> wrote:
> > This looks like its not cancelled at all and migrates the it to
> > another cpu. I'll see what I can come up with.
thanks - i've queued up the patch below. Once this goes upstream it
would be a backport candidate as wel.
Ingo
-------------------------->
Subject: sched: fix hrtick_start_fair and CPU-Hotplug
From: Peter Zijlstra <peterz@...radead.org>
Date: Tue, 29 Apr 2008 10:02:46 +0200
Gautham R Shenoy reported:
> While running the usual CPU-Hotplug stress tests on linux-2.6.25,
> I noticed the following in the console logs.
>
> This is a wee bit difficult to reproduce. In the past 10 runs I hit this
> only once.
>
> ------------[ cut here ]------------
>
> WARNING: at kernel/sched.c:962 hrtick+0x2e/0x65()
>
> Just wondering if we are doing a good job at handling the cancellation
> of any per-cpu scheduler timers during CPU-Hotplug.
This looks like its indeed not cancelled at all and migrates the it to
another cpu. Fix it via a proper hotplug notifier mechanism.
Reported-by: Gautham R Shenoy <ego@...ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Signed-off-by: Ingo Molnar <mingo@...e.hu>
---
kernel/sched.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -1238,6 +1238,7 @@ static inline void resched_rq(struct rq
enum {
HRTICK_SET, /* re-programm hrtick_timer */
HRTICK_RESET, /* not a new slice */
+ HRTICK_BLOCK, /* stop hrtick operations */
};
/*
@@ -1249,6 +1250,8 @@ static inline int hrtick_enabled(struct
{
if (!sched_feat(HRTICK))
return 0;
+ if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
+ return 0;
return hrtimer_is_hres_active(&rq->hrtick_timer);
}
@@ -1331,7 +1334,63 @@ static enum hrtimer_restart hrtick(struc
return HRTIMER_NORESTART;
}
-static inline void init_rq_hrtick(struct rq *rq)
+static void hotplug_hrtick_disable(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ rq->hrtick_flags = 0;
+ __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ hrtick_clear(rq);
+}
+
+static void hotplug_hrtick_enable(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static int
+hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ int cpu = (int)hcpu;
+
+ switch (action) {
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ hotplug_hrtick_disable(cpu);
+ return NOTIFY_OK;
+
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ hotplug_hrtick_enable(cpu);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static void init_hrtick(void)
+{
+ hotcpu_notifier(hotplug_hrtick, 0);
+}
+
+static void init_rq_hrtick(struct rq *rq)
{
rq->hrtick_flags = 0;
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -1368,6 +1427,10 @@ static inline void init_rq_hrtick(struct
void hrtick_resched(void)
{
}
+
+static inline void init_hrtick(void)
+{
+}
#endif
/*
@@ -8020,6 +8083,7 @@ void __init sched_init_smp(void)
put_online_cpus();
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);
+ init_hrtick();
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists