[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1610241711220.4983@nanos>
Date: Mon, 24 Oct 2016 17:13:03 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: Linus Torvalds <torvalds@...ux-foundation.org>
cc: LKML <linux-kernel@...r.kernel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Ingo Molnar <mingo@...nel.org>,
"H. Peter Anvin" <hpa@...or.com>
Subject: Re: [GIT pull] timer updates for 4.9
On Mon, 24 Oct 2016, Thomas Gleixner wrote:
> Can you please check in the disassembly whether gcc really reloads
> timer->flags? Mine does not...
>
> Another thing you might try is to enable debugobjects. As this happens only
> at shutdown time the problem might be caused by something else which
> wreckages timers in some subtle way.
Find below a combo patch which cures your findings and the NOHZ related
wreckage, which just makes timers fire early, but does not corrupt stuff in
a way which could explain your splat.
Thanks,
tglx
8<---------------------
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -878,7 +878,7 @@ static inline struct timer_base *get_tim
#ifdef CONFIG_NO_HZ_COMMON
static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
{
#ifdef CONFIG_SMP
if ((tflags & TIMER_PINNED) || !base->migration_enabled)
@@ -891,25 +891,27 @@ static inline struct timer_base *
static inline void forward_timer_base(struct timer_base *base)
{
+ unsigned long jnow = READ_ONCE(jiffies);
+
/*
* We only forward the base when it's idle and we have a delta between
* base clock and jiffies.
*/
- if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+ if (!base->is_idle || (long) (jnow - base->clk) < 2)
return;
/*
* If the next expiry value is > jiffies, then we fast forward to
* jiffies otherwise we forward to the next expiry value.
*/
- if (time_after(base->next_expiry, jiffies))
- base->clk = jiffies;
+ if (time_after(base->next_expiry, jnow))
+ base->clk = jnow;
else
base->clk = base->next_expiry;
}
#else
static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
{
return get_timer_this_cpu_base(tflags);
}
@@ -917,14 +919,6 @@ static inline struct timer_base *
static inline void forward_timer_base(struct timer_base *base) { }
#endif
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
- struct timer_base *target = __get_target_base(base, tflags);
-
- forward_timer_base(target);
- return target;
-}
/*
* We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_bas
{
for (;;) {
struct timer_base *base;
- u32 tf = timer->flags;
+ u32 tf;
+
+ /*
+ * We need to use READ_ONCE() here, otherwise the compiler
+ * might re-read @tf between the check for TIMER_MIGRATING
+ * and spin_lock().
+ */
+ tf = READ_ONCE(timer->flags);
if (!(tf & TIMER_MIGRATING)) {
base = get_timer_base(tf);
@@ -964,6 +965,8 @@ static inline int
unsigned long clk = 0, flags;
int ret = 0;
+ BUG_ON(!timer->function);
+
/*
* This is a common optimization triggered by the networking code - if
* the timer is re-modified to have the same timeout or ends up in the
@@ -972,13 +975,16 @@ static inline int
if (timer_pending(timer)) {
if (timer->expires == expires)
return 1;
+
/*
- * Take the current timer_jiffies of base, but without holding
- * the lock!
+ * We lock timer base and calculate the bucket index right
+ * here. If the timer ends up in the same bucket, then we
+ * just update the expiry time and avoid the whole
+ * dequeue/enqueue dance.
*/
- base = get_timer_base(timer->flags);
- clk = base->clk;
+ base = lock_timer_base(timer, &flags);
+ clk = base->clk;
idx = calc_wheel_index(expires, clk);
/*
@@ -988,14 +994,14 @@ static inline int
*/
if (idx == timer_get_idx(timer)) {
timer->expires = expires;
- return 1;
+ ret = 1;
+ goto out_unlock;
}
+ } else {
+ base = lock_timer_base(timer, &flags);
}
timer_stats_timer_set_start_info(timer);
- BUG_ON(!timer->function);
-
- base = lock_timer_base(timer, &flags);
ret = detach_if_pending(timer, base, false);
if (!ret && pending_only)
@@ -1025,12 +1031,16 @@ static inline int
}
}
+ /* Try to forward a stale timer base clock */
+ forward_timer_base(base);
+
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
- * between calculating 'idx' and taking the lock, only enqueue_timer()
- * and trigger_dyntick_cpu() is required. Otherwise we need to
- * (re)calculate the wheel index via internal_add_timer().
+ * between calculating 'idx' and possibly switching the base, only
+ * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
+ * we need to (re)calculate the wheel index via
+ * internal_add_timer().
*/
if (idx != UINT_MAX && clk == base->clk) {
enqueue_timer(base, timer, idx);
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned lo
is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
base->next_expiry = nextevt;
/*
- * We have a fresh next event. Check whether we can forward the base:
- */
- if (time_after(nextevt, jiffies))
- base->clk = jiffies;
- else if (time_after(nextevt, base->clk))
- base->clk = nextevt;
+ * We have a fresh next event. Check whether we can forward the
+ * base. We can only do that when @basej is past base->clk
+ * otherwise we might rewind base->clk.
+ */
+ if (time_after(basej, base->clk)) {
+ if (time_after(nextevt, basej))
+ base->clk = basej;
+ else if (time_after(nextevt, base->clk))
+ base->clk = nextevt;
+ }
if (time_before_eq(nextevt, basej)) {
expires = basem;
Powered by blists - more mailing lists