lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 27 Sep 2017 18:40:26 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Anna-Maria Gleixner <anna-maria@...utronix.de>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...hat.com>,
        Christoph Hellwig <hch@....org>, keescook@...omium.org,
        John Stultz <john.stultz@...aro.org>,
        Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [PATCH 17/25] hrtimer: Implementation of softirq hrtimer handling

On Thu, Aug 31, 2017 at 12:23:42PM -0000, Anna-Maria Gleixner wrote:
> @@ -540,12 +539,23 @@ static ktime_t __hrtimer_get_next_event(
>  
>  	hrtimer_update_next_timer(cpu_base, NULL);
>  
> +	if (!cpu_base->softirq_activated) {
> +		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
> +		expires_next = __hrtimer_next_event_base(cpu_base, active,
> +							 expires_next);
> +		cpu_base->softirq_expires_next = expires_next;
> +	}
> +

So this bugged the living daylight out of me. That's a very asymmetric
thing to do. The normal rule is that get_next_event() updates
cpu_base::next_timer and hrtimer_reprogram() updates
cpu_base::expires_next. And here you make a giant mess of things.

The below is a fairly large diff on top of this patch which is
_completely_ untested, but should hopefully restore sanity.

 - fixes the mixing of bool and bitfields in cpu_base
 - restores the whole get_next_timer() / reprogram() symmetry
   by adding cpu_base::softirq_next_timer.
 - adds a comment hopefully explaining the why of that
 - adds for_each_active_base() helper, we had two copies of that, and
   for a brief moment, I had another few, luckily those didn't survive
   :-)
 - uses irqsave/irqrestore for the cpu_base->lock fiddling around
   __run_hrtimer().
 - folded hrtimer_reprogram() and hrtimer_reprogram_softirq()
 - removed superfluous local_bh_disable(), since local_irq_disable()
   already implies much the same.

Please consider...

---
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -186,21 +186,26 @@ struct hrtimer_cpu_base {
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
-	bool				migration_enabled;
-	bool				nohz_active;
-	bool				softirq_activated;
-	unsigned int			hres_active	: 1,
-					in_hrtirq	: 1,
-					hang_detected	: 1;
+
+	unsigned int			migration_enabled : 1;
+	unsigned int			nohz_active	  : 1;
+	unsigned int			softirq_activated : 1;
+	unsigned int			hres_active	  : 1;
+	unsigned int			in_hrtirq	  : 1;
+	unsigned int			hang_detected	  : 1;
+
 #ifdef CONFIG_HIGH_RES_TIMERS
 	unsigned int			nr_events;
 	unsigned int			nr_retries;
 	unsigned int			nr_hangs;
 	unsigned int			max_hang_time;
 #endif
+
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
 	ktime_t				softirq_expires_next;
+	struct hrtimer			*softirq_next_timer;
+
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 } ____cacheline_aligned;
 
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -78,6 +78,7 @@
 #define MASK_SHIFT		(HRTIMER_BASE_MONOTONIC_SOFT)
 #define HRTIMER_ACTIVE_HARD	((1U << MASK_SHIFT) - 1)
 #define HRTIMER_ACTIVE_SOFT	(HRTIMER_ACTIVE_HARD << MASK_SHIFT)
+#define HRTIMER_ACTIVE		(HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
 
 /*
  * The timer bases:
@@ -493,33 +494,49 @@ static inline void debug_deactivate(stru
 	trace_hrtimer_cancel(timer);
 }
 
-static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
-					     struct hrtimer *timer)
+static inline bool hrtimer_is_softirq(struct hrtimer *timer)
 {
-	cpu_base->next_timer = timer;
+	return timer->base->index >= HRTIMER_BASE_MONOTONIC_SOFT;
+}
+
+
+static struct hrtimer_block_base *
+__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
+{
+	struct hrtimer_clock_base *base = NULL;
+
+	if (*active) {
+		int idx = __ffs(*active);
+		*active &= (1U << idx);
+		base = cpu_base->clock_base[idx];
+	}
+
+	return base;
 }
 
+#define for_each_active_base(base, cpu_base, active)	\
+	while ((base = __next_base((cpu_base), &(active))))
+
 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 					 unsigned int active,
 					 ktime_t expires_next)
 {
+	struct hrtimer_clock_base *base;
 	ktime_t expires;
 
-	while (active) {
-		unsigned int id = __ffs(active);
-		struct hrtimer_clock_base *base;
+	for_each_active_base(base, cpu_base, active) {
 		struct timerqueue_node *next;
 		struct hrtimer *timer;
 
-		active &= ~(1U << id);
-		base = cpu_base->clock_base + id;
-
 		next = timerqueue_getnext(&base->active);
 		timer = container_of(next, struct hrtimer, node);
 		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 		if (expires < expires_next) {
 			expires_next = expires;
-			hrtimer_update_next_timer(cpu_base, timer);
+			if (hrtimer_is_softirq(timer))
+				cpu_base->softirq_next_timer = timer;
+			else
+				cpu_base->next_timer = timer;
 		}
 	}
 	/*
@@ -529,30 +546,47 @@ static ktime_t __hrtimer_next_event_base
 	 */
 	if (expires_next < 0)
 		expires_next = 0;
+
 	return expires_next;
 }
 
-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
+/*
+ * Recomputes cpu_base::*next_timer and returns the earliest expires_next but
+ * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
+ *
+ * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
+ * those timers will get run whenever the softirq gets handled, at the end of
+ * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
+ *
+ * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
+ * The !softirq values are the minima across HRTIMER_ACTIVE, unless an actual
+ * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
+ *
+ * @active_mask must be one of:
+ *  - HRTIMER_ACTIVE,
+ *  - HRTIMER_ACTIVE_SOFT, or
+ *  - HRTIMER_ACTIVE_HARD.
+ */
+static ktime_t
+__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
 {
 	unsigned int active;
+	struct hrtimer *next_timer = NULL;
 	ktime_t expires_next = KTIME_MAX;
 
-	hrtimer_update_next_timer(cpu_base, NULL);
-
-	if (!cpu_base->softirq_activated) {
+	if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
-		expires_next = __hrtimer_next_event_base(cpu_base, active,
-							 expires_next);
-		cpu_base->softirq_expires_next = expires_next;
-	}
+		cpu_base->softirq_next_timer = next_timer;
+		expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
 
-	active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
-	expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+		next_timer = cpu_base->softirq_next_timer;
+	}
 
-	/*
-	 * cpu_base->expires_next is not updated here. It is set only
-	 * in hrtimer_reprogramming path!
-	 */
+	if (active_mask & HRTIMER_ACTIVE_HARD) {
+		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+		cpu_base->next_timer = next_timer;
+		expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+	}
 
 	return expires_next;
 }
@@ -621,7 +655,10 @@ hrtimer_force_reprogram(struct hrtimer_c
 	if (!cpu_base->hres_active)
 		return;
 
-	expires_next = __hrtimer_get_next_event(cpu_base);
+	/*
+	 * Find the current next expiration time.
+	 */
+	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE);
 
 	if (skip_equal && expires_next == cpu_base->expires_next)
 		return;
@@ -727,6 +764,20 @@ static void hrtimer_reprogram(struct hrt
 
 	WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
 
+	if (hrtimer_is_softirq(timer)) {
+		if (cpu_base->softirq_activated)
+			return;
+
+		if (!ktime_before(expires, cpu_base->softirq_expires_next))
+			return;
+
+		cpu_base->softirq_next_timer = timer;
+		cpu_base->softirq_expires_next = expires;
+
+		if (!ktime_before(expires, cpu_base->expires_next))
+			return;
+	}
+
 	/*
 	 * If the timer is not on the current cpu, we cannot reprogram
 	 * the other cpus clock event device.
@@ -755,7 +806,7 @@ static void hrtimer_reprogram(struct hrt
 		return;
 
 	/* Update the pointer to the next expiring timer */
-	hrtimer_update_next_timer(cpu_base, timer);
+	cpu_base->next_timer = timer;
 	cpu_base->expires_next = expires;
 
 	/*
@@ -979,47 +1030,24 @@ static inline ktime_t hrtimer_update_low
 	return tim;
 }
 
-static void hrtimer_reprogram_softirq(struct hrtimer *timer)
+static void
+hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
 {
-	struct hrtimer_clock_base *base = timer->base;
-	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
 	ktime_t expires;
 
 	/*
-	 * The softirq timer is not rearmed, when the softirq was raised
-	 * and has not yet run to completion.
+	 * Find the next SOFT expiration.
 	 */
-	if (cpu_base->softirq_activated)
-		return;
-
-	expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-
-	if (!ktime_before(expires, cpu_base->softirq_expires_next))
-		return;
-
-	cpu_base->softirq_expires_next = expires;
-
-	if (!ktime_before(expires, cpu_base->expires_next))
-		return;
-	hrtimer_reprogram(timer);
-}
-
-static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base,
-					 bool reprogram)
-{
-	ktime_t expires;
-
-	expires = __hrtimer_get_next_event(cpu_base);
+	expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
 
 	if (!reprogram || !ktime_before(expires, cpu_base->expires_next))
 		return;
+
 	/*
-	 * next_timer can be used here, because
-	 * hrtimer_get_next_event() updated the next
-	 * timer. expires_next is only set when reprogramming function
-	 * is called.
+	 * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
+	 * cpu_base->*expires_next is only set by hrtimer_reprogram()
 	 */
-	hrtimer_reprogram(cpu_base->next_timer);
+	hrtimer_reprogram(cpu_base->softirq_next_timer);
 }
 
 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
@@ -1060,12 +1088,9 @@ void hrtimer_start_range_ns(struct hrtim
 
 	base = lock_hrtimer_base(timer, &flags);
 
-	if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) {
-		if (timer->base->index < HRTIMER_BASE_MONOTONIC_SOFT)
-			hrtimer_reprogram(timer);
-		else
-			hrtimer_reprogram_softirq(timer);
-	}
+	if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
+		hrtimer_reprogram(timer);
+
 	unlock_hrtimer_base(timer, &flags);
 }
 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
@@ -1163,7 +1188,7 @@ u64 hrtimer_get_next_event(void)
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	if (!__hrtimer_hres_active(cpu_base))
-		expires = __hrtimer_get_next_event(cpu_base);
+		expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
@@ -1263,7 +1288,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active);
 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 			  struct hrtimer_clock_base *base,
 			  struct hrtimer *timer, ktime_t *now,
-			  bool hardirq)
+			  unsigned long flags)
 {
 	enum hrtimer_restart (*fn)(struct hrtimer *);
 	int restart;
@@ -1298,19 +1323,13 @@ static void __run_hrtimer(struct hrtimer
 	 * protected against migration to a different CPU even if the lock
 	 * is dropped.
 	 */
-	if (hardirq)
-		raw_spin_unlock(&cpu_base->lock);
-	else
-		raw_spin_unlock_irq(&cpu_base->lock);
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
 	trace_hrtimer_expire_entry(timer, now);
 	restart = fn(timer);
 	trace_hrtimer_expire_exit(timer);
 
-	if (hardirq)
-		raw_spin_lock(&cpu_base->lock);
-	else
-		raw_spin_lock_irq(&cpu_base->lock);
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	/*
 	 * Note: We clear the running state after enqueue_hrtimer and
@@ -1339,19 +1358,15 @@ static void __run_hrtimer(struct hrtimer
 }
 
 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
-				 unsigned int active_mask)
+				 unsigned int active_mask, unsigned long flags)
 {
 	unsigned int active = cpu_base->active_bases & active_mask;
+	struct hrtimer_clock_base *base;
 
-	while (active) {
-		unsigned int id = __ffs(active);
-		struct hrtimer_clock_base *base;
+	for_each_active_base(base, cpu_base, active) {
 		struct timerqueue_node *node;
 		ktime_t basenow;
 
-		active &= ~(1U << id);
-		base = cpu_base->clock_base + id;
-
 		basenow = ktime_add(now, base->offset);
 
 		while ((node = timerqueue_getnext(&base->active))) {
@@ -1374,8 +1389,7 @@ static void __hrtimer_run_queues(struct
 			if (basenow < hrtimer_get_softexpires_tv64(timer))
 				break;
 
-			__run_hrtimer(cpu_base, base, timer, &basenow,
-				      active_mask == HRTIMER_ACTIVE_HARD);
+			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
 		}
 	}
 }
@@ -1383,17 +1397,18 @@ static void __hrtimer_run_queues(struct
 static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	unsigned long flags;
 	ktime_t now;
 
-	raw_spin_lock_irq(&cpu_base->lock);
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
-	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT);
+	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT, flags);
 
 	cpu_base->softirq_activated = 0;
 	hrtimer_update_softirq_timer(cpu_base, true);
 
-	raw_spin_unlock_irq(&cpu_base->lock);
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1406,13 +1421,14 @@ void hrtimer_interrupt(struct clock_even
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	ktime_t expires_next, now, entry_time, delta;
+	unsigned long flags;
 	int retries = 0;
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
 	dev->next_event = KTIME_MAX;
 
-	raw_spin_lock(&cpu_base->lock);
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 	entry_time = now = hrtimer_update_base(cpu_base);
 retry:
 	cpu_base->in_hrtirq = 1;
@@ -1431,17 +1447,17 @@ void hrtimer_interrupt(struct clock_even
 		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 	}
 
-	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD);
+	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD, flags);
 
-	/* Reevaluate the hard interrupt clock bases for the next expiry */
-	expires_next = __hrtimer_get_next_event(cpu_base);
+	/* Reevaluate the clock bases for the next expiry */
+	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE);
 	/*
 	 * Store the new expiry value so the migration code can verify
 	 * against it.
 	 */
 	cpu_base->expires_next = expires_next;
 	cpu_base->in_hrtirq = 0;
-	raw_spin_unlock(&cpu_base->lock);
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
 	/* Reprogramming necessary ? */
 	if (!tick_program_event(expires_next, 0)) {
@@ -1462,7 +1478,7 @@ void hrtimer_interrupt(struct clock_even
 	 * Acquire base lock for updating the offsets and retrieving
 	 * the current time.
 	 */
-	raw_spin_lock(&cpu_base->lock);
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 	now = hrtimer_update_base(cpu_base);
 	cpu_base->nr_retries++;
 	if (++retries < 3)
@@ -1475,7 +1491,8 @@ void hrtimer_interrupt(struct clock_even
 	 */
 	cpu_base->nr_hangs++;
 	cpu_base->hang_detected = 1;
-	raw_spin_unlock(&cpu_base->lock);
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+
 	delta = ktime_sub(now, entry_time);
 	if ((unsigned int)delta > cpu_base->max_hang_time)
 		cpu_base->max_hang_time = (unsigned int) delta;
@@ -1517,6 +1534,7 @@ static inline void __hrtimer_peek_ahead_
 void hrtimer_run_queues(void)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	unsigned long flags;
 	ktime_t now;
 
 	if (__hrtimer_hres_active(cpu_base))
@@ -1534,7 +1552,7 @@ void hrtimer_run_queues(void)
 		return;
 	}
 
-	raw_spin_lock(&cpu_base->lock);
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 	now = hrtimer_update_base(cpu_base);
 
 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
@@ -1543,8 +1561,8 @@ void hrtimer_run_queues(void)
 		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 	}
 
-	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD);
-	raw_spin_unlock(&cpu_base->lock);
+	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD, flags);
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
 
 /*
@@ -1768,7 +1786,6 @@ int hrtimers_dead_cpu(unsigned int scpu)
 	BUG_ON(cpu_online(scpu));
 	tick_cancel_sched_timer(scpu);
 
-	local_bh_disable();
 	local_irq_disable();
 	old_base = &per_cpu(hrtimer_bases, scpu);
 	new_base = this_cpu_ptr(&hrtimer_bases);
@@ -1796,7 +1813,6 @@ int hrtimers_dead_cpu(unsigned int scpu)
 	/* Check, if we got expired work to do */
 	__hrtimer_peek_ahead_timers();
 	local_irq_enable();
-	local_bh_enable();
 	return 0;
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ