lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080929224440.GG6697@linux.vnet.ibm.com>
Date:	Mon, 29 Sep 2008 15:44:40 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Thomas Gleixner <tglx@...utronix.de>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	LKML <linux-kernel@...r.kernel.org>, Ingo Molnar <mingo@...e.hu>
Subject: Re: [GIT pull] timer fixes for .27

On Tue, Sep 30, 2008 at 12:15:11AM +0200, Thomas Gleixner wrote:
> Linus,
> 
> Please pull the latest timers-fixes-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git timers-fixes-for-linus
> 
> The patches fix hard to trigger bugs in the CPU offline code of
> hrtimers which were noticed by Paul McKenney recently. In the worst
> case they can leave migrated hrtimers in a stale state.

Acked-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Tested-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>

>  Thanks,
> 
> 	tglx
> 
> ------------------>
> Thomas Gleixner (4):
>       hrtimer: migrate pending list on cpu offline
>       hrtimer: fix migration of CB_IRQSAFE_NO_SOFTIRQ hrtimers
>       hrtimer: mark migration state
>       hrtimer: prevent migration of per CPU hrtimers
> 
> 
>  include/linux/hrtimer.h      |   18 ++++++--
>  kernel/hrtimer.c             |   95 +++++++++++++++++++++++++++++++++++++----
>  kernel/sched.c               |    4 +-
>  kernel/time/tick-sched.c     |    2 +-
>  kernel/trace/trace_sysprof.c |    2 +-
>  5 files changed, 103 insertions(+), 18 deletions(-)
> 
> diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
> index 6d93dce..2f245fe 100644
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -47,14 +47,22 @@ enum hrtimer_restart {
>   *	HRTIMER_CB_IRQSAFE:		Callback may run in hardirq context
>   *	HRTIMER_CB_IRQSAFE_NO_RESTART:	Callback may run in hardirq context and
>   *					does not restart the timer
> - *	HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:	Callback must run in hardirq context
> - *					Special mode for tick emultation
> + *	HRTIMER_CB_IRQSAFE_PERCPU:	Callback must run in hardirq context
> + *					Special mode for tick emulation and
> + *					scheduler timer. Such timers are per
> + *					cpu and not allowed to be migrated on
> + *					cpu unplug.
> + *	HRTIMER_CB_IRQSAFE_UNLOCKED:	Callback should run in hardirq context
> + *					with timer->base lock unlocked
> + *					used for timers which call wakeup to
> + *					avoid lock order problems with rq->lock
>   */
>  enum hrtimer_cb_mode {
>  	HRTIMER_CB_SOFTIRQ,
>  	HRTIMER_CB_IRQSAFE,
>  	HRTIMER_CB_IRQSAFE_NO_RESTART,
> -	HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
> +	HRTIMER_CB_IRQSAFE_PERCPU,
> +	HRTIMER_CB_IRQSAFE_UNLOCKED,
>  };
> 
>  /*
> @@ -67,9 +75,10 @@ enum hrtimer_cb_mode {
>   * 0x02		callback function running
>   * 0x04		callback pending (high resolution mode)
>   *
> - * Special case:
> + * Special cases:
>   * 0x03		callback function running and enqueued
>   *		(was requeued on another CPU)
> + * 0x09		timer was migrated on CPU hotunplug
>   * The "callback function running and enqueued" status is only possible on
>   * SMP. It happens for example when a posix timer expired and the callback
>   * queued a signal. Between dropping the lock which protects the posix timer
> @@ -87,6 +96,7 @@ enum hrtimer_cb_mode {
>  #define HRTIMER_STATE_ENQUEUED	0x01
>  #define HRTIMER_STATE_CALLBACK	0x02
>  #define HRTIMER_STATE_PENDING	0x04
> +#define HRTIMER_STATE_MIGRATE	0x08
> 
>  /**
>   * struct hrtimer - the basic hrtimer structure
> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
> index b8e4dce..cdec83e 100644
> --- a/kernel/hrtimer.c
> +++ b/kernel/hrtimer.c
> @@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
>  			 */
>  			BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
>  			return 1;
> -		case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
> +		case HRTIMER_CB_IRQSAFE_PERCPU:
> +		case HRTIMER_CB_IRQSAFE_UNLOCKED:
>  			/*
>  			 * This is solely for the sched tick emulation with
>  			 * dynamic tick support to ensure that we do not
>  			 * restart the tick right on the edge and end up with
>  			 * the tick timer in the softirq ! The calling site
> -			 * takes care of this.
> +			 * takes care of this. Also used for hrtimer sleeper !
>  			 */
>  			debug_hrtimer_deactivate(timer);
>  			return 1;
> @@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer)
>  	timer_stats_account_hrtimer(timer);
> 
>  	fn = timer->function;
> -	if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
> +	if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
> +	    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
>  		/*
>  		 * Used for scheduler timers, avoid lock inversion with
>  		 * rq->lock and tasklist_lock.
> @@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
>  	sl->timer.function = hrtimer_wakeup;
>  	sl->task = task;
>  #ifdef CONFIG_HIGH_RES_TIMERS
> -	sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
> +	sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
>  #endif
>  }
> 
> @@ -1591,29 +1593,95 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
> 
>  #ifdef CONFIG_HOTPLUG_CPU
> 
> -static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
> -				struct hrtimer_clock_base *new_base)
> +static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
> +				struct hrtimer_clock_base *new_base, int dcpu)
>  {
>  	struct hrtimer *timer;
>  	struct rb_node *node;
> +	int raise = 0;
> 
>  	while ((node = rb_first(&old_base->active))) {
>  		timer = rb_entry(node, struct hrtimer, node);
>  		BUG_ON(hrtimer_callback_running(timer));
>  		debug_hrtimer_deactivate(timer);
> -		__remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
> +
> +		/*
> +		 * Should not happen. Per CPU timers should be
> +		 * canceled _before_ the migration code is called
> +		 */
> +		if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
> +			__remove_hrtimer(timer, old_base,
> +					 HRTIMER_STATE_INACTIVE, 0);
> +			WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
> +			     timer, timer->function, dcpu);
> +			continue;
> +		}
> +
> +		/*
> +		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
> +		 * timer could be seen as !active and just vanish away
> +		 * under us on another CPU
> +		 */
> +		__remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
>  		timer->base = new_base;
>  		/*
>  		 * Enqueue the timer. Allow reprogramming of the event device
>  		 */
>  		enqueue_hrtimer(timer, new_base, 1);
> +
> +#ifdef CONFIG_HIGH_RES_TIMERS
> +		/*
> +		 * Happens with high res enabled when the timer was
> +		 * already expired and the callback mode is
> +		 * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
> +		 * enqueue code does not move them to the soft irq
> +		 * pending list for performance/latency reasons, but
> +		 * in the migration state, we need to do that
> +		 * otherwise we end up with a stale timer.
> +		 */
> +		if (timer->state == HRTIMER_STATE_MIGRATE) {
> +			timer->state = HRTIMER_STATE_PENDING;
> +			list_add_tail(&timer->cb_entry,
> +				      &new_base->cpu_base->cb_pending);
> +			raise = 1;
> +		}
> +#endif
> +		/* Clear the migration state bit */
> +		timer->state &= ~HRTIMER_STATE_MIGRATE;
> +	}
> +	return raise;
> +}
> +
> +#ifdef CONFIG_HIGH_RES_TIMERS
> +static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
> +				   struct hrtimer_cpu_base *new_base)
> +{
> +	struct hrtimer *timer;
> +	int raise = 0;
> +
> +	while (!list_empty(&old_base->cb_pending)) {
> +		timer = list_entry(old_base->cb_pending.next,
> +				   struct hrtimer, cb_entry);
> +
> +		__remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
> +		timer->base = &new_base->clock_base[timer->base->index];
> +		list_add_tail(&timer->cb_entry, &new_base->cb_pending);
> +		raise = 1;
>  	}
> +	return raise;
> +}
> +#else
> +static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
> +				   struct hrtimer_cpu_base *new_base)
> +{
> +	return 0;
>  }
> +#endif
> 
>  static void migrate_hrtimers(int cpu)
>  {
>  	struct hrtimer_cpu_base *old_base, *new_base;
> -	int i;
> +	int i, raise = 0;
> 
>  	BUG_ON(cpu_online(cpu));
>  	old_base = &per_cpu(hrtimer_bases, cpu);
> @@ -1626,14 +1694,21 @@ static void migrate_hrtimers(int cpu)
>  	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
> 
>  	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
> -		migrate_hrtimer_list(&old_base->clock_base[i],
> -				     &new_base->clock_base[i]);
> +		if (migrate_hrtimer_list(&old_base->clock_base[i],
> +					 &new_base->clock_base[i], cpu))
> +			raise = 1;
>  	}
> 
> +	if (migrate_hrtimer_pending(old_base, new_base))
> +		raise = 1;
> +
>  	spin_unlock(&old_base->lock);
>  	spin_unlock(&new_base->lock);
>  	local_irq_enable();
>  	put_cpu_var(hrtimer_bases);
> +
> +	if (raise)
> +		hrtimer_raise_softirq();
>  }
>  #endif /* CONFIG_HOTPLUG_CPU */
> 
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 13dd2db..ad1962d 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
>  	hrtimer_init(&rt_b->rt_period_timer,
>  			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	rt_b->rt_period_timer.function = sched_rt_period_timer;
> -	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
> +	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
>  }
> 
>  static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
> @@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq)
> 
>  	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	rq->hrtick_timer.function = hrtick;
> -	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
> +	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
>  }
>  #else
>  static inline void hrtick_clear(struct rq *rq)
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index 39019b3..cb02324 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -625,7 +625,7 @@ void tick_setup_sched_timer(void)
>  	 */
>  	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
>  	ts->sched_timer.function = tick_sched_timer;
> -	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
> +	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
> 
>  	/* Get the next period (per cpu) */
>  	ts->sched_timer.expires = tick_init_jiffy_update();
> diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
> index bb948e5..db58fb6 100644
> --- a/kernel/trace/trace_sysprof.c
> +++ b/kernel/trace/trace_sysprof.c
> @@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
> 
>  	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	hrtimer->function = stack_trace_timer_fn;
> -	hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
> +	hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
> 
>  	hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
>  }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ