lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150604104902.GH3644@twins.programming.kicks-ass.net>
Date:	Thu, 4 Jun 2015 12:49:02 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Kirill Tkhai <tkhai@...dex.ru>
Cc:	"umgwanakikbuti@...il.com" <umgwanakikbuti@...il.com>,
	"mingo@...e.hu" <mingo@...e.hu>,
	"ktkhai@...allels.com" <ktkhai@...allels.com>,
	"rostedt@...dmis.org" <rostedt@...dmis.org>,
	"tglx@...utronix.de" <tglx@...utronix.de>,
	"juri.lelli@...il.com" <juri.lelli@...il.com>,
	"pang.xunlei@...aro.org" <pang.xunlei@...aro.org>,
	"oleg@...hat.com" <oleg@...hat.com>,
	"wanpeng.li@...ux.intel.com" <wanpeng.li@...ux.intel.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer

On Thu, Jun 04, 2015 at 12:07:03PM +0300, Kirill Tkhai wrote:
> > --- a/include/linux/hrtimer.h
> > +++ b/include/linux/hrtimer.h
> > @@ -391,11 +391,25 @@ extern u64 hrtimer_get_next_event(void);
> >   * A timer is active, when it is enqueued into the rbtree or the
> >   * callback function is running or it's in the state of being migrated
> >   * to another cpu.
> > + *
> > + * See __run_hrtimer().
> >   */
> > -static inline int hrtimer_active(const struct hrtimer *timer)
> > +static inline bool hrtimer_active(const struct hrtimer *timer)
> >  {
> > -	return timer->state != HRTIMER_STATE_INACTIVE ||
> > -		timer->base->running == timer;
> > +	if (timer->state != HRTIMER_STATE_INACTIVE)
> > +		return true;
> > +
> > +	smp_rmb(); /* C matches A */
> > +
> > +	if (timer->base->running == timer)
> > +		return true;
> > +
> > +	smp_rmb(); /* D matches B */
> > +
> > +	if (timer->state != HRTIMER_STATE_INACTIVE)
> > +		return true;
> > +
> > +	return false;
> 
> This races with two sequential timer handlers. hrtimer_active()
> is preemptible everywhere, and no guarantees that all three "if"
> conditions check the same timer tick.

Indeed.

> How about transformation of hrtimer_bases.lock: raw_spinlock_t --> seqlock_t?

Ingo will like that because it means we already need to touch cpu_base.

But I think there's a problem there on timer migration, the timer can
migrate between bases while we do the seq read loop and then you can get
false positives on the different seqcount numbers.

We could of course do something like the below, but hrtimer_is_active()
is turning into quite the monster.

Needs more comments at the very least, its fully of trickery.

---
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -59,7 +59,9 @@ enum hrtimer_restart {
  * mean touching the timer after the callback, this makes it impossible to free
  * the timer from the callback function.
  *
- * Therefore we track the callback state in timer->base->running == timer.
+ * Therefore we track the callback state in:
+ *
+ * 	timer->base->cpu_base->running == timer
  *
  * On SMP it is possible to have a "callback function running and enqueued"
  * status. It happens for example when a posix timer expired and the callback
@@ -144,7 +146,6 @@ struct hrtimer_clock_base {
 	struct timerqueue_head	active;
 	ktime_t			(*get_time)(void);
 	ktime_t			offset;
-	struct hrtimer		*running;
 } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
 
 enum  hrtimer_base_type {
@@ -159,6 +160,8 @@ enum  hrtimer_base_type {
  * struct hrtimer_cpu_base - the per cpu clock bases
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
+ * @seq:		seqcount around __run_hrtimer
+ * @running:		pointer to the currently running hrtimer
  * @cpu:		cpu number
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set_seq:	Sequence counter of clock was set events
@@ -180,6 +183,8 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
+	seqcount_t			seq;
+	struct hrtimer			*running;
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
@@ -394,8 +399,24 @@ extern u64 hrtimer_get_next_event(void);
  */
 static inline int hrtimer_active(const struct hrtimer *timer)
 {
-	return timer->state != HRTIMER_STATE_INACTIVE ||
-		timer->base->running == timer;
+	struct hrtimer_cpu_base *cpu_base;
+	unsigned int seq;
+	bool active;
+
+	do {
+		active = false;
+		cpu_base = READ_ONCE(timer->base->cpu_base);
+		seqcount_lockdep_reader_access(&cpu_base->seq);
+		seq = raw_read_seqcount(&cpu_base->seq);
+
+		if (timer->state != HRTIMER_STATE_INACTIVE ||
+		    cpu_base->running == timer)
+			active = true;
+
+	} while (read_seqcount_retry(&cpu_base->seq, seq) ||
+		 cpu_base != READ_ONCE(timer->base->cpu_base));
+
+	return active;
 }
 
 /*
@@ -412,7 +433,7 @@ static inline int hrtimer_is_queued(stru
  */
 static inline int hrtimer_callback_running(struct hrtimer *timer)
 {
-	return timer->base->running == timer;
+	return timer->base->cpu_base->running == timer;
 }
 
 /* Forward a hrtimer so it expires after now: */
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -67,6 +67,7 @@
 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 {
 	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
+	.seq = SEQCNT_ZERO(hrtimer_bases.seq),
 	.clock_base =
 	{
 		{
@@ -113,9 +114,15 @@ static inline int hrtimer_clockid_to_bas
 /*
  * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
  * such that hrtimer_callback_running() can unconditionally dereference
- * timer->base.
+ * timer->base->cpu_base
  */
-static struct hrtimer_clock_base migration_base;
+static struct hrtimer_cpu_base migration_cpu_base = {
+	.seq = SEQCNT_ZERO(migration_cpu_base),
+};
+
+static struct hrtimer_clock_base migration_base {
+	.cpu_base = &migration_cpu_base,
+};
 
 /*
  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
@@ -1118,10 +1125,16 @@ static void __run_hrtimer(struct hrtimer
 	enum hrtimer_restart (*fn)(struct hrtimer *);
 	int restart;
 
-	WARN_ON(!irqs_disabled());
+	lockdep_assert_held(&cpu_base->lock);
 
 	debug_deactivate(timer);
-	base->running = timer;
+	cpu_base->running = timer;
+
+	/*
+	 * separate the ->running assignment from the ->state assignment
+	 */
+	write_seqcount_begin(&cpu_base->seq);
+
 	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
 	timer_stats_account_hrtimer(timer);
 	fn = timer->function;
@@ -1150,8 +1163,13 @@ static void __run_hrtimer(struct hrtimer
 	    !(timer->state & HRTIMER_STATE_ENQUEUED))
 		enqueue_hrtimer(timer, base);
 
-	WARN_ON_ONCE(base->running != timer);
-	base->running = NULL;
+	/*
+	 * separate the ->running assignment from the ->state assignment
+	 */
+	write_seqcount_end(&cpu_base->seq);
+
+	WARN_ON_ONCE(cpu_base->running != timer);
+	cpu_base->running = NULL;
 }
 
 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ