lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 26 May 2017 14:27:35 -0500
From:   Haris Okanovic <haris.okanovic@...com>
To:     Anna-Maria Gleixner <anna-maria@...utronix.de>
Cc:     Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        linux-rt-users@...r.kernel.org, linux-kernel@...r.kernel.org,
        tglx@...utronix.de, julia.cartwright@...com, gratian.crisan@...com
Subject: Re: [PATCH] Revert "timers: Don't wake ktimersoftd on every tick"

Anna-Maria,

Look-ahead is implemented by tick_find_expired() and expiry by 
__run_timers(), both of which hold timer_base::lock (raw spin lock) 
while running. Those two routines shouldn't be able to run 
simultaneously on the same timer_base. Are you sure the race isn't in 
another code path?

Do you have a test which demonstrates this? I'd like to investigate. 
Your .config file would also be useful.

I'm looking at v4.9.27-rt17 source.

Thanks,
Haris


On 05/26/2017 12:16 PM, Anna-Maria Gleixner wrote:
> This reverts commit 032f93cae150a.
>
> The problem is that the look ahead optimization from the tick timer
> interrupt context can race with the softirq thread expiring timer. As
> a consequence the temporary hlist heads which hold the to expire
> timers are overwritten and the timers which are already removed from
> the wheel bucket for expiry are now dangling w/o a list head.
>
> That means those timers never get expired. If one of those timers is
> canceled the removal operation will result in a hlist corruption.
>
> Signed-off-by: Anna-Maria Gleixner <anna-maria@...utronix.de>
> ---
>  kernel/time/timer.c | 96 ++++++++++++++++-------------------------------------
>  1 file changed, 29 insertions(+), 67 deletions(-)
>
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index cdff4411f8f6..08a5ab762495 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -206,8 +206,6 @@ struct timer_base {
>  	bool			is_idle;
>  	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
>  	struct hlist_head	vectors[WHEEL_SIZE];
> -	struct hlist_head	expired_lists[LVL_DEPTH];
> -	int			expired_count;
>  } ____cacheline_aligned;
>
>  static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
> @@ -1355,8 +1353,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
>  	}
>  }
>
> -static inline void __expire_timers(struct timer_base *base,
> -				   struct hlist_head *head)
> +static void expire_timers(struct timer_base *base, struct hlist_head *head)
>  {
>  	while (!hlist_empty(head)) {
>  		struct timer_list *timer;
> @@ -1387,38 +1384,21 @@ static inline void __expire_timers(struct timer_base *base,
>  	}
>  }
>
> -static void expire_timers(struct timer_base *base)
> -{
> -	struct hlist_head *head;
> -
> -	while (base->expired_count--) {
> -		head = base->expired_lists + base->expired_count;
> -		__expire_timers(base, head);
> -	}
> -	base->expired_count = 0;
> -}
> -
> -static void __collect_expired_timers(struct timer_base *base)
> +static int __collect_expired_timers(struct timer_base *base,
> +				    struct hlist_head *heads)
>  {
>  	unsigned long clk = base->clk;
>  	struct hlist_head *vec;
> -	int i;
> +	int i, levels = 0;
>  	unsigned int idx;
>
> -	/*
> -	 * expire_timers() must be called at least once before we can
> -	 * collect more timers
> -	 */
> -	if (WARN_ON(base->expired_count))
> -		return;
> -
>  	for (i = 0; i < LVL_DEPTH; i++) {
>  		idx = (clk & LVL_MASK) + i * LVL_SIZE;
>
>  		if (__test_and_clear_bit(idx, base->pending_map)) {
>  			vec = base->vectors + idx;
> -			hlist_move_list(vec,
> -				&base->expired_lists[base->expired_count++]);
> +			hlist_move_list(vec, heads++);
> +			levels++;
>  		}
>  		/* Is it time to look at the next level? */
>  		if (clk & LVL_CLK_MASK)
> @@ -1426,6 +1406,7 @@ static void __collect_expired_timers(struct timer_base *base)
>  		/* Shift clock for the next level granularity */
>  		clk >>= LVL_CLK_SHIFT;
>  	}
> +	return levels;
>  }
>
>  #ifdef CONFIG_NO_HZ_COMMON
> @@ -1618,7 +1599,8 @@ void timer_clear_idle(void)
>  	base->is_idle = false;
>  }
>
> -static void collect_expired_timers(struct timer_base *base)
> +static int collect_expired_timers(struct timer_base *base,
> +				  struct hlist_head *heads)
>  {
>  	/*
>  	 * NOHZ optimization. After a long idle sleep we need to forward the
> @@ -1635,49 +1617,20 @@ static void collect_expired_timers(struct timer_base *base)
>  		if (time_after(next, jiffies)) {
>  			/* The call site will increment clock! */
>  			base->clk = jiffies - 1;
> -			return;
> +			return 0;
>  		}
>  		base->clk = next;
>  	}
> -	__collect_expired_timers(base);
> +	return __collect_expired_timers(base, heads);
>  }
>  #else
> -static inline void collect_expired_timers(struct timer_base *base)
> +static inline int collect_expired_timers(struct timer_base *base,
> +					 struct hlist_head *heads)
>  {
> -	__collect_expired_timers(base);
> +	return __collect_expired_timers(base, heads);
>  }
>  #endif
>
> -static int find_expired_timers(struct timer_base *base)
> -{
> -	const unsigned long int end_clk = jiffies;
> -
> -	while (!base->expired_count && time_after_eq(end_clk, base->clk)) {
> -		collect_expired_timers(base);
> -		base->clk++;
> -	}
> -
> -	return base->expired_count;
> -}
> -
> -/* Called from CPU tick routine to quickly collect expired timers */
> -static int tick_find_expired(struct timer_base *base)
> -{
> -	int count;
> -
> -	raw_spin_lock(&base->lock);
> -
> -	if (unlikely(time_after(jiffies, base->clk + HZ))) {
> -		/* defer to ktimersoftd; don't spend too long in irq context */
> -		count = -1;
> -	} else
> -		count = find_expired_timers(base);
> -
> -	raw_spin_unlock(&base->lock);
> -
> -	return count;
> -}
> -
>  /*
>   * Called from the timer interrupt handler to charge one tick to the current
>   * process.  user_tick is 1 if the tick is user time, 0 for system.
> @@ -1704,11 +1657,22 @@ void update_process_times(int user_tick)
>   */
>  static inline void __run_timers(struct timer_base *base)
>  {
> +	struct hlist_head heads[LVL_DEPTH];
> +	int levels;
> +
> +	if (!time_after_eq(jiffies, base->clk))
> +		return;
> +
>  	raw_spin_lock_irq(&base->lock);
>
> -	while (find_expired_timers(base))
> -		expire_timers(base);
> +	while (time_after_eq(jiffies, base->clk)) {
> +
> +		levels = collect_expired_timers(base, heads);
> +		base->clk++;
>
> +		while (levels--)
> +			expire_timers(base, heads + levels);
> +	}
>  	raw_spin_unlock_irq(&base->lock);
>  	wakeup_timer_waiters(base);
>  }
> @@ -1736,12 +1700,12 @@ void run_local_timers(void)
>
>  	hrtimer_run_queues();
>  	/* Raise the softirq only if required. */
> -	if (time_before(jiffies, base->clk) || !tick_find_expired(base)) {
> +	if (time_before(jiffies, base->clk)) {
>  		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
>  			return;
>  		/* CPU is awake, so check the deferrable base. */
>  		base++;
> -		if (time_before(jiffies, base->clk) || !tick_find_expired(base))
> +		if (time_before(jiffies, base->clk))
>  			return;
>  	}
>  	raise_softirq(TIMER_SOFTIRQ);
> @@ -1911,7 +1875,6 @@ int timers_dead_cpu(unsigned int cpu)
>  		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
>
>  		BUG_ON(old_base->running_timer);
> -		BUG_ON(old_base->expired_count);
>
>  		for (i = 0; i < WHEEL_SIZE; i++)
>  			migrate_timer_list(new_base, old_base->vectors + i);
> @@ -1938,7 +1901,6 @@ static void __init init_timer_cpu(int cpu)
>  #ifdef CONFIG_PREEMPT_RT_FULL
>  		init_swait_queue_head(&base->wait_for_running_timer);
>  #endif
> -		base->expired_count = 0;
>  	}
>  }
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ