lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1705261911230.11993@hypnos.tec.linutronix.de>
Date:   Fri, 26 May 2017 19:16:07 +0200 (CEST)
From:   Anna-Maria Gleixner <anna-maria@...utronix.de>
To:     Sebastian Andrzej Siewior <bigeasy@...utronix.de>
cc:     Haris Okanovic <haris.okanovic@...com>,
        linux-rt-users@...r.kernel.org, linux-kernel@...r.kernel.org,
        tglx@...utronix.de, julia.cartwright@...com, gratian.crisan@...com
Subject: [PATCH] Revert "timers: Don't wake ktimersoftd on every tick"

This reverts commit 032f93cae150a.

The problem is that the look ahead optimization from the tick timer
interrupt context can race with the softirq thread expiring timer. As
a consequence the temporary hlist heads which hold the to expire
timers are overwritten and the timers which are already removed from
the wheel bucket for expiry are now dangling w/o a list head.

That means those timers never get expired. If one of those timers is
canceled the removal operation will result in a hlist corruption.

Signed-off-by: Anna-Maria Gleixner <anna-maria@...utronix.de>
---
 kernel/time/timer.c | 96 ++++++++++++++++-------------------------------------
 1 file changed, 29 insertions(+), 67 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index cdff4411f8f6..08a5ab762495 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -206,8 +206,6 @@ struct timer_base {
 	bool			is_idle;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
 	struct hlist_head	vectors[WHEEL_SIZE];
-	struct hlist_head	expired_lists[LVL_DEPTH];
-	int			expired_count;
 } ____cacheline_aligned;
 
 static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
@@ -1355,8 +1353,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 	}
 }
 
-static inline void __expire_timers(struct timer_base *base,
-				   struct hlist_head *head)
+static void expire_timers(struct timer_base *base, struct hlist_head *head)
 {
 	while (!hlist_empty(head)) {
 		struct timer_list *timer;
@@ -1387,38 +1384,21 @@ static inline void __expire_timers(struct timer_base *base,
 	}
 }
 
-static void expire_timers(struct timer_base *base)
-{
-	struct hlist_head *head;
-
-	while (base->expired_count--) {
-		head = base->expired_lists + base->expired_count;
-		__expire_timers(base, head);
-	}
-	base->expired_count = 0;
-}
-
-static void __collect_expired_timers(struct timer_base *base)
+static int __collect_expired_timers(struct timer_base *base,
+				    struct hlist_head *heads)
 {
 	unsigned long clk = base->clk;
 	struct hlist_head *vec;
-	int i;
+	int i, levels = 0;
 	unsigned int idx;
 
-	/*
-	 * expire_timers() must be called at least once before we can
-	 * collect more timers
-	 */
-	if (WARN_ON(base->expired_count))
-		return;
-
 	for (i = 0; i < LVL_DEPTH; i++) {
 		idx = (clk & LVL_MASK) + i * LVL_SIZE;
 
 		if (__test_and_clear_bit(idx, base->pending_map)) {
 			vec = base->vectors + idx;
-			hlist_move_list(vec,
-				&base->expired_lists[base->expired_count++]);
+			hlist_move_list(vec, heads++);
+			levels++;
 		}
 		/* Is it time to look at the next level? */
 		if (clk & LVL_CLK_MASK)
@@ -1426,6 +1406,7 @@ static void __collect_expired_timers(struct timer_base *base)
 		/* Shift clock for the next level granularity */
 		clk >>= LVL_CLK_SHIFT;
 	}
+	return levels;
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -1618,7 +1599,8 @@ void timer_clear_idle(void)
 	base->is_idle = false;
 }
 
-static void collect_expired_timers(struct timer_base *base)
+static int collect_expired_timers(struct timer_base *base,
+				  struct hlist_head *heads)
 {
 	/*
 	 * NOHZ optimization. After a long idle sleep we need to forward the
@@ -1635,49 +1617,20 @@ static void collect_expired_timers(struct timer_base *base)
 		if (time_after(next, jiffies)) {
 			/* The call site will increment clock! */
 			base->clk = jiffies - 1;
-			return;
+			return 0;
 		}
 		base->clk = next;
 	}
-	__collect_expired_timers(base);
+	return __collect_expired_timers(base, heads);
 }
 #else
-static inline void collect_expired_timers(struct timer_base *base)
+static inline int collect_expired_timers(struct timer_base *base,
+					 struct hlist_head *heads)
 {
-	__collect_expired_timers(base);
+	return __collect_expired_timers(base, heads);
 }
 #endif
 
-static int find_expired_timers(struct timer_base *base)
-{
-	const unsigned long int end_clk = jiffies;
-
-	while (!base->expired_count && time_after_eq(end_clk, base->clk)) {
-		collect_expired_timers(base);
-		base->clk++;
-	}
-
-	return base->expired_count;
-}
-
-/* Called from CPU tick routine to quickly collect expired timers */
-static int tick_find_expired(struct timer_base *base)
-{
-	int count;
-
-	raw_spin_lock(&base->lock);
-
-	if (unlikely(time_after(jiffies, base->clk + HZ))) {
-		/* defer to ktimersoftd; don't spend too long in irq context */
-		count = -1;
-	} else
-		count = find_expired_timers(base);
-
-	raw_spin_unlock(&base->lock);
-
-	return count;
-}
-
 /*
  * Called from the timer interrupt handler to charge one tick to the current
  * process.  user_tick is 1 if the tick is user time, 0 for system.
@@ -1704,11 +1657,22 @@ void update_process_times(int user_tick)
  */
 static inline void __run_timers(struct timer_base *base)
 {
+	struct hlist_head heads[LVL_DEPTH];
+	int levels;
+
+	if (!time_after_eq(jiffies, base->clk))
+		return;
+
 	raw_spin_lock_irq(&base->lock);
 
-	while (find_expired_timers(base))
-		expire_timers(base);
+	while (time_after_eq(jiffies, base->clk)) {
+
+		levels = collect_expired_timers(base, heads);
+		base->clk++;
 
+		while (levels--)
+			expire_timers(base, heads + levels);
+	}
 	raw_spin_unlock_irq(&base->lock);
 	wakeup_timer_waiters(base);
 }
@@ -1736,12 +1700,12 @@ void run_local_timers(void)
 
 	hrtimer_run_queues();
 	/* Raise the softirq only if required. */
-	if (time_before(jiffies, base->clk) || !tick_find_expired(base)) {
+	if (time_before(jiffies, base->clk)) {
 		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
 			return;
 		/* CPU is awake, so check the deferrable base. */
 		base++;
-		if (time_before(jiffies, base->clk) || !tick_find_expired(base))
+		if (time_before(jiffies, base->clk))
 			return;
 	}
 	raise_softirq(TIMER_SOFTIRQ);
@@ -1911,7 +1875,6 @@ int timers_dead_cpu(unsigned int cpu)
 		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 		BUG_ON(old_base->running_timer);
-		BUG_ON(old_base->expired_count);
 
 		for (i = 0; i < WHEEL_SIZE; i++)
 			migrate_timer_list(new_base, old_base->vectors + i);
@@ -1938,7 +1901,6 @@ static void __init init_timer_cpu(int cpu)
 #ifdef CONFIG_PREEMPT_RT_FULL
 		init_swait_queue_head(&base->wait_for_running_timer);
 #endif
-		base->expired_count = 0;
 	}
 }
 
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ