[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <214021433348760@web25g.yandex.ru>
Date: Wed, 03 Jun 2015 19:26:00 +0300
From: Kirill Tkhai <tkhai@...dex.ru>
To: Peter Zijlstra <peterz@...radead.org>,
"umgwanakikbuti@...il.com" <umgwanakikbuti@...il.com>,
"mingo@...e.hu" <mingo@...e.hu>
Cc: "ktkhai@...allels.com" <ktkhai@...allels.com>,
"rostedt@...dmis.org" <rostedt@...dmis.org>,
"tglx@...utronix.de" <tglx@...utronix.de>,
"juri.lelli@...il.com" <juri.lelli@...il.com>,
"pang.xunlei@...aro.org" <pang.xunlei@...aro.org>,
"oleg@...hat.com" <oleg@...hat.com>,
"wanpeng.li@...ux.intel.com" <wanpeng.li@...ux.intel.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer
[sorry for died formatting]
03.06.2015, 16:55, "Peter Zijlstra" <peterz@...radead.org>:
> Currently an hrtimer callback function cannot free its own timer
> because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK
> after it. Freeing the timer would result in a clear use-after-free.
>
> Solve this by using a scheme similar to regular timers; track the
> current running timer in hrtimer_clock_base::running.
>
> Suggested-by: Thomas Gleixner <tglx@...utronix.de>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
> include/linux/hrtimer.h | 35 ++++++++++++++---------------------
> kernel/time/hrtimer.c | 48 ++++++++++++++++++++++--------------------------
> 2 files changed, 36 insertions(+), 47 deletions(-)
>
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -53,34 +53,25 @@ enum hrtimer_restart {
> *
> * 0x00 inactive
> * 0x01 enqueued into rbtree
> - * 0x02 callback function running
> - * 0x04 timer is migrated to another cpu
> + * 0x02 timer is migrated to another cpu
> *
> - * Special cases:
> - * 0x03 callback function running and enqueued
> - * (was requeued on another CPU)
> - * 0x05 timer was migrated on CPU hotunplug
> + * The callback state is not part of the timer->state because clearing it would
> + * mean touching the timer after the callback, this makes it impossible to free
> + * the timer from the callback function.
> *
> - * The "callback function running and enqueued" status is only possible on
> - * SMP. It happens for example when a posix timer expired and the callback
> + * Therefore we track the callback state in timer->base->running == timer.
> + *
> + * On SMP it is possible to have a "callback function running and enqueued"
> + * status. It happens for example when a posix timer expired and the callback
> * queued a signal. Between dropping the lock which protects the posix timer
> * and reacquiring the base lock of the hrtimer, another CPU can deliver the
> - * signal and rearm the timer. We have to preserve the callback running state,
> - * as otherwise the timer could be removed before the softirq code finishes the
> - * the handling of the timer.
> - *
> - * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
> - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
> - * also affects HRTIMER_STATE_MIGRATE where the preservation is not
> - * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
> - * enqueued on the new cpu.
> + * signal and rearm the timer.
> *
> * All state transitions are protected by cpu_base->lock.
> */
> #define HRTIMER_STATE_INACTIVE 0x00
> #define HRTIMER_STATE_ENQUEUED 0x01
> -#define HRTIMER_STATE_CALLBACK 0x02
> -#define HRTIMER_STATE_MIGRATE 0x04
> +#define HRTIMER_STATE_MIGRATE 0x02
>
> /**
> * struct hrtimer - the basic hrtimer structure
> @@ -153,6 +144,7 @@ struct hrtimer_clock_base {
> struct timerqueue_head active;
> ktime_t (*get_time)(void);
> ktime_t offset;
> + struct hrtimer *running;
> } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
>
> enum hrtimer_base_type {
> @@ -402,7 +394,8 @@ extern u64 hrtimer_get_next_event(void);
> */
> static inline int hrtimer_active(const struct hrtimer *timer)
> {
> - return timer->state != HRTIMER_STATE_INACTIVE;
> + return timer->state != HRTIMER_STATE_INACTIVE ||
> + timer->base->running == timer;
> }
It seems to be not good, because hrtimer_active() check stops
to be atomic. So the things like hrtimer_try_to_cancel() race
with a callback of self-rearming timer and may return a false
positive result.
> /*
> @@ -419,7 +412,7 @@ static inline int hrtimer_is_queued(stru
> */
> static inline int hrtimer_callback_running(struct hrtimer *timer)
> {
> - return timer->state & HRTIMER_STATE_CALLBACK;
> + return timer->base->running == timer;
> }
>
> /* Forward a hrtimer so it expires after now: */
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -111,6 +111,13 @@ static inline int hrtimer_clockid_to_bas
> #ifdef CONFIG_SMP
>
> /*
> + * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
> + * such that hrtimer_callback_running() can unconditionally dereference
> + * timer->base.
> + */
> +static struct hrtimer_clock_base migration_base;
> +
> +/*
> * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
> * means that all timers which are tied to this base via timer->base are
> * locked, and the base itself is locked too.
> @@ -119,8 +126,8 @@ static inline int hrtimer_clockid_to_bas
> * be found on the lists/queues.
> *
> * When the timer's base is locked, and the timer removed from list, it is
> - * possible to set timer->base = NULL and drop the lock: the timer remains
> - * locked.
> + * possible to set timer->base = &migration_base and drop the lock: the timer
> + * remains locked.
> */
> static
> struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
> @@ -130,7 +137,7 @@ struct hrtimer_clock_base *lock_hrtimer_
>
> for (;;) {
> base = timer->base;
> - if (likely(base != NULL)) {
> + if (likely(base != &migration_base)) {
> raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
> if (likely(base == timer->base))
> return base;
> @@ -194,8 +201,8 @@ switch_hrtimer_base(struct hrtimer *time
> if (unlikely(hrtimer_callback_running(timer)))
> return base;
>
> - /* See the comment in lock_timer_base() */
> - timer->base = NULL;
> + /* See the comment in lock_hrtimer_base() */
> + timer->base = &migration_base;
> raw_spin_unlock(&base->cpu_base->lock);
> raw_spin_lock(&new_base->cpu_base->lock);
>
> @@ -840,11 +847,7 @@ static int enqueue_hrtimer(struct hrtime
>
> base->cpu_base->active_bases |= 1 << base->index;
>
> - /*
> - * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
> - * state of a possibly running callback.
> - */
> - timer->state |= HRTIMER_STATE_ENQUEUED;
> + timer->state = HRTIMER_STATE_ENQUEUED;
>
> return timerqueue_add(&base->active, &timer->node);
> }
> @@ -894,7 +897,6 @@ static inline int
> remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
> {
> if (hrtimer_is_queued(timer)) {
> - unsigned long state;
> int reprogram;
>
> /*
> @@ -908,13 +910,8 @@ remove_hrtimer(struct hrtimer *timer, st
> debug_deactivate(timer);
> timer_stats_hrtimer_clear_start_info(timer);
> reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
> - /*
> - * We must preserve the CALLBACK state flag here,
> - * otherwise we could move the timer base in
> - * switch_hrtimer_base.
> - */
> - state = timer->state & HRTIMER_STATE_CALLBACK;
> - __remove_hrtimer(timer, base, state, reprogram);
> +
> + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, reprogram);
> return 1;
> }
> return 0;
> @@ -1124,7 +1121,8 @@ static void __run_hrtimer(struct hrtimer
> WARN_ON(!irqs_disabled());
>
> debug_deactivate(timer);
> - __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
> + base->running = timer;
> + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
> timer_stats_account_hrtimer(timer);
> fn = timer->function;
>
> @@ -1140,7 +1138,7 @@ static void __run_hrtimer(struct hrtimer
> raw_spin_lock(&cpu_base->lock);
>
> /*
> - * Note: We clear the CALLBACK bit after enqueue_hrtimer and
> + * Note: We clear the running state after enqueue_hrtimer and
> * we do not reprogramm the event hardware. Happens either in
> * hrtimer_start_range_ns() or in hrtimer_interrupt()
> *
> @@ -1152,9 +1150,8 @@ static void __run_hrtimer(struct hrtimer
> !(timer->state & HRTIMER_STATE_ENQUEUED))
> enqueue_hrtimer(timer, base);
>
> - WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
> -
> - timer->state &= ~HRTIMER_STATE_CALLBACK;
> + WARN_ON_ONCE(base->running != timer);
> + base->running = NULL;
> }
>
> static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
> @@ -1523,11 +1520,10 @@ static void migrate_hrtimer_list(struct
> * hrtimer_interrupt after we migrated everything to
> * sort out already expired timers and reprogram the
> * event device.
> + *
> + * Sets timer->state = HRTIMER_STATE_ENQUEUED.
> */
> enqueue_hrtimer(timer, new_base);
> -
> - /* Clear the migration state bit */
> - timer->state &= ~HRTIMER_STATE_MIGRATE;
> }
> }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists