lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 29 Nov 2017 16:30:53 +0100
From:   Anna-Maria Gleixner <anna-maria@...utronix.de>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     Thomas Gleixner <tglx@...utronix.de>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>, keescook@...omium.org,
        Christoph Hellwig <hch@....de>,
        John Stultz <john.stultz@...aro.org>,
        Anna-Maria Gleixner <anna-maria@...utronix.de>
Subject: [PATCH v3 28/36] hrtimer: Implement support for softirq based hrtimers

hrtimer callbacks are always invoked in hard interrupt context. Several
users in tree require soft interrupt context for their callbacks and
achieve this by combining a hrtimer with a tasklet. The hrtimer schedules
the tasklet in hard interrupt context and the tasklet callback gets invoked
in softirq context later.

That's suboptimal and aside of that the real-time patch moves most of the
hrtimers into softirq context. So adding native support for hrtimers
expiring in softirq context is a valuable extension for both mainline and
the RT patch set.

Each valid hrtimer clock id has two associated hrtimer clock bases: one for
timers expiring in hardirq context and one for timers expiring in softirq
context.

Implement the functionality to associate a hrtimer with the hard or softirq
related clock bases and update the relevant functions to take them into
account when the next expiry time needs to be evaluated.

Add a check into the hard interrupt context handler functions to check
whether the first expiring softirq based timer has expired. If it's expired
the softirq is raised and the accounting of softirq based timers to
evaluate the next expiry time for programming the timer hardware is skipped
until the softirq processing has finished. At the end of the softirq
processing the regular processing is resumed.

Suggested-by: Thomas Gleixner <tglx@...utronix.de>
Suggested-by: Peter Zijlstra <peterz@...radead.org>
Signed-off-by: Anna-Maria Gleixner <anna-maria@...utronix.de>
---
 include/linux/hrtimer.h |  21 +++--
 kernel/time/hrtimer.c   | 201 ++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 186 insertions(+), 36 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 26ae8a868ea8..c7902ca7c9f4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -103,6 +103,7 @@ enum hrtimer_restart {
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
  * @is_rel:	Set if the timer was armed relative
+ * @is_soft:	Set if hrtimer will be expired in soft interrupt context.
  *
  * The hrtimer structure must be initialized by hrtimer_init()
  */
@@ -113,6 +114,7 @@ struct hrtimer {
 	struct hrtimer_clock_base	*base;
 	u8				state;
 	u8				is_rel;
+	u8				is_soft;
 };
 
 /**
@@ -178,13 +180,18 @@ enum  hrtimer_base_type {
  * @hres_active:	State of high resolution mode
  * @in_hrtirq:		hrtimer_interrupt() is currently executing
  * @hang_detected:	The last hrtimer interrupt detected a hang
+ * @softirq_activated:	displays, if the softirq is raised - update of softirq
+ *			related settings is not required then.
  * @nr_events:		Total number of hrtimer interrupt events
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
  * @expires_next:	absolute time of the next event, is required for remote
- *			hrtimer enqueue
+ *			hrtimer enqueue; it is the total first expiry time (hard
+ *			and soft hrtimer are taken into account)
  * @next_timer:		Pointer to the first expiring timer
+ * @softirq_expires_next: Time to check, if soft queues needs also to be expired
+ * @softirq_next_timer: Pointer to the first expiring softirq based timer
  * @clock_base:		array of clock bases for this cpu
  *
  * Note: next_timer is just an optimization for __remove_hrtimer().
@@ -196,9 +203,10 @@ struct hrtimer_cpu_base {
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
-	unsigned int			hres_active	: 1,
-					in_hrtirq	: 1,
-					hang_detected	: 1;
+	unsigned int			hres_active		: 1,
+					in_hrtirq		: 1,
+					hang_detected		: 1,
+					softirq_activated       : 1;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	unsigned int			nr_events;
 	unsigned short			nr_retries;
@@ -207,6 +215,8 @@ struct hrtimer_cpu_base {
 #endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
+	ktime_t				softirq_expires_next;
+	struct hrtimer			*softirq_next_timer;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 } ____cacheline_aligned;
 
@@ -379,7 +389,8 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  * @timer:	the timer to be added
  * @tim:	expiry time
  * @mode:	timer mode: absolute (HRTIMER_MODE_ABS) or
- *		relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED)
+ *		relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
+ *		softirq based mode is considered for debug purpose only!
  */
 static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
 				 const enum hrtimer_mode mode)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 65211fd7288d..40b14f025829 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -68,6 +68,9 @@
 #define HRTIMER_ACTIVE_SOFT	(HRTIMER_ACTIVE_HARD << MASK_SHIFT)
 #define HRTIMER_ACTIVE_ALL	(HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
 
+/* Define for debug mode check */
+#define HRTIMER_MODECHECK	true
+
 /*
  * The timer bases:
  *
@@ -411,8 +414,17 @@ static inline void debug_hrtimer_init(struct hrtimer *timer)
 	debug_object_init(timer, &hrtimer_debug_descr);
 }
 
-static inline void debug_hrtimer_activate(struct hrtimer *timer)
+static inline void debug_hrtimer_activate(struct hrtimer *timer,
+					  enum hrtimer_mode mode,
+					  bool modecheck)
 {
+	/*
+	 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
+	 * match, when a timer is started via__hrtimer_start_range_ns().
+	 */
+	if (modecheck)
+		WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+
 	debug_object_activate(timer, &hrtimer_debug_descr);
 }
 
@@ -444,8 +456,11 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer)
 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
 
 #else
+
 static inline void debug_hrtimer_init(struct hrtimer *timer) { }
-static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
+static inline void debug_hrtimer_activate(struct hrtimer *timer,
+					  enum hrtimer_mode mode,
+					  bool modecheck) { }
 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 #endif
 
@@ -458,9 +473,10 @@ debug_init(struct hrtimer *timer, clockid_t clockid,
 }
 
 static inline void debug_activate(struct hrtimer *timer,
-				  enum hrtimer_mode mode)
+				  enum hrtimer_mode mode,
+				  bool modecheck)
 {
-	debug_hrtimer_activate(timer);
+	debug_hrtimer_activate(timer, mode, modecheck);
 	trace_hrtimer_start(timer, mode);
 }
 
@@ -487,7 +503,6 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
 #define for_each_active_base(base, cpu_base, active)	\
 	while ((base = __next_base((cpu_base), &(active))))
 
-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 					 unsigned int active,
 					 ktime_t expires_next)
@@ -504,7 +519,10 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 		if (expires < expires_next) {
 			expires_next = expires;
-			cpu_base->next_timer = timer;
+			if (timer->is_soft)
+				cpu_base->softirq_next_timer = timer;
+			else
+				cpu_base->next_timer = timer;
 		}
 	}
 	/*
@@ -521,25 +539,42 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
  * Recomputes cpu_base::*next_timer and returns the earliest expires_next but
  * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
  *
+ * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
+ * those timers will get run whenever the softirq gets handled, at the end of
+ * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
+ *
+ * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
+ * The !softirq values are the minima across HRTIMER_ACTIVE, unless an actual
+ * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
+ *
  * @active_mask must be one of:
  *  - HRTIMER_ACTIVE,
  *  - HRTIMER_ACTIVE_SOFT, or
  *  - HRTIMER_ACTIVE_HARD.
  */
-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base,
-					unsigned int active_mask)
+static ktime_t
+__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
 {
 	unsigned int active;
+	struct hrtimer *next_timer = NULL;
 	ktime_t expires_next = KTIME_MAX;
 
-	cpu_base->next_timer = NULL;
+	if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
+		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+		cpu_base->softirq_next_timer = NULL;
+		expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX);
 
-	active = cpu_base->active_bases & active_mask;
-	expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+		next_timer = cpu_base->softirq_next_timer;
+	}
+
+	if (active_mask & HRTIMER_ACTIVE_HARD) {
+		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+		cpu_base->next_timer = next_timer;
+		expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+	}
 
 	return expires_next;
 }
-#endif
 
 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
@@ -547,8 +582,14 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
 	ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
 
-	return ktime_get_update_offsets_now(&base->clock_was_set_seq,
+	ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
 					    offs_real, offs_boot, offs_tai);
+
+	base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
+	base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
+	base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
+
+	return now;
 }
 
 /*
@@ -575,7 +616,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 {
 	ktime_t expires_next;
 
-	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+	/*
+	 * Find the current next expiration time.
+	 */
+	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
+
+	if (cpu_base->next_timer && cpu_base->next_timer->is_soft) {
+		/*
+		 * When the softirq is activated, hrtimer has to be
+		 * programmed with the first hard hrtimer because soft
+		 * timer interrupt could occur too late.
+		 */
+		if (cpu_base->softirq_activated)
+			expires_next = __hrtimer_get_next_event(cpu_base,
+								HRTIMER_ACTIVE_HARD);
+		else
+			cpu_base->softirq_expires_next = expires_next;
+	}
 
 	if (skip_equal && expires_next == cpu_base->expires_next)
 		return;
@@ -702,7 +759,7 @@ static inline void retrigger_next_event(void *arg) { }
  *
  * Called with interrupts disabled and base->cpu_base.lock held
  */
-static void hrtimer_reprogram(struct hrtimer *timer)
+static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	struct hrtimer_clock_base *base = timer->base;
@@ -711,6 +768,28 @@ static void hrtimer_reprogram(struct hrtimer *timer)
 	WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
 
 	/*
+	 * CLOCK_REALTIME timer might be requested with an absolute
+	 * expiry time which is less than base->offset. Set it to 0.
+	 */
+	if (expires < 0)
+		expires = 0;
+
+	if (timer->is_soft) {
+		if (cpu_base->softirq_activated)
+			return;
+
+		if (!ktime_before(expires, cpu_base->softirq_expires_next))
+			return;
+
+		cpu_base->softirq_next_timer = timer;
+		cpu_base->softirq_expires_next = expires;
+
+		if (!ktime_before(expires, cpu_base->expires_next) ||
+		    !reprogram)
+			return;
+	}
+
+	/*
 	 * If the timer is not on the current cpu, we cannot reprogram
 	 * the other cpus clock event device.
 	 */
@@ -727,13 +806,6 @@ static void hrtimer_reprogram(struct hrtimer *timer)
 	if (cpu_base->in_hrtirq)
 		return;
 
-	/*
-	 * CLOCK_REALTIME timer might be requested with an absolute
-	 * expiry time which is less than base->offset. Set it to 0.
-	 */
-	if (expires < 0)
-		expires = 0;
-
 	if (expires >= cpu_base->expires_next)
 		return;
 
@@ -866,9 +938,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
  */
 static int enqueue_hrtimer(struct hrtimer *timer,
 			   struct hrtimer_clock_base *base,
-			   enum hrtimer_mode mode)
+			   enum hrtimer_mode mode,
+			   bool modecheck)
 {
-	debug_activate(timer, mode);
+	debug_activate(timer, mode, modecheck);
 
 	base->cpu_base->active_bases |= 1 << base->index;
 
@@ -959,6 +1032,31 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
 	return tim;
 }
 
+static void
+hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
+{
+	ktime_t expires;
+
+	/*
+	 * Find the next SOFT expiration.
+	 */
+	expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
+
+	/*
+	 * reprogramming needs to be triggered, even if the next soft
+	 * hrtimer expires at the same time than the next hard
+	 * hrtimer. cpu_base->softirq_expires_next needs to be updated!
+	 */
+	if (!reprogram || expires == KTIME_MAX)
+		return;
+
+	/*
+	 * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
+	 * cpu_base->*expires_next is only set by hrtimer_reprogram()
+	 */
+	hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
+}
+
 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 				    u64 delta_ns, const enum hrtimer_mode mode,
 				    struct hrtimer_clock_base *base)
@@ -978,7 +1076,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	/* Switch the timer base, if necessary: */
 	new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
 
-	return enqueue_hrtimer(timer, new_base, mode);
+	return enqueue_hrtimer(timer, new_base, mode, HRTIMER_MODECHECK);
 }
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer
@@ -986,7 +1084,8 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  * @tim:	expiry time
  * @delta_ns:	"slack" range for the timer
  * @mode:	timer mode: absolute (HRTIMER_MODE_ABS) or
- *		relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED)
+ *		relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
+ *		softirq based mode is considered for debug purpose only!
  */
 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 			    u64 delta_ns, const enum hrtimer_mode mode)
@@ -997,7 +1096,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	base = lock_hrtimer_base(timer, &flags);
 
 	if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
-		hrtimer_reprogram(timer);
+		hrtimer_reprogram(timer, true);
 
 	unlock_hrtimer_base(timer, &flags);
 }
@@ -1096,7 +1195,7 @@ u64 hrtimer_get_next_event(void)
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	if (!__hrtimer_hres_active(cpu_base))
-		expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+		expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
@@ -1254,7 +1353,8 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 	 */
 	if (restart != HRTIMER_NORESTART &&
 	    !(timer->state & HRTIMER_STATE_ENQUEUED))
-		enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
+		enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS,
+				!HRTIMER_MODECHECK);
 
 	/*
 	 * Separate the ->running assignment from the ->state assignment.
@@ -1306,6 +1406,23 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 	}
 }
 
+static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
+{
+	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	unsigned long flags;
+	ktime_t now;
+
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+	now = hrtimer_update_base(cpu_base);
+	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
+
+	cpu_base->softirq_activated = 0;
+	hrtimer_update_softirq_timer(cpu_base, true);
+
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+}
+
 #ifdef CONFIG_HIGH_RES_TIMERS
 
 /*
@@ -1336,10 +1453,16 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	 */
 	cpu_base->expires_next = KTIME_MAX;
 
+	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+		cpu_base->softirq_expires_next = KTIME_MAX;
+		cpu_base->softirq_activated = 1;
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+	}
+
 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
 
 	/* Reevaluate the clock bases for the next expiry */
-	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+	expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
 	/*
 	 * Store the new expiry value so the migration code can verify
 	 * against it.
@@ -1443,6 +1566,13 @@ void hrtimer_run_queues(void)
 
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 	now = hrtimer_update_base(cpu_base);
+
+	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+		cpu_base->softirq_expires_next = KTIME_MAX;
+		cpu_base->softirq_activated = 1;
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+	}
+
 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
@@ -1624,6 +1754,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->cpu = cpu;
 	cpu_base->hres_active = 0;
 	cpu_base->expires_next = KTIME_MAX;
+	cpu_base->softirq_expires_next = KTIME_MAX;
 	return 0;
 }
 
@@ -1655,7 +1786,8 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 * sort out already expired timers and reprogram the
 		 * event device.
 		 */
-		enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
+		enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS,
+				!HRTIMER_MODECHECK);
 	}
 }
 
@@ -1682,6 +1814,12 @@ int hrtimers_dead_cpu(unsigned int scpu)
 				     &new_base->clock_base[i]);
 	}
 
+	/*
+	 * The migration might have changed the first expiring softirq
+	 * timer on this CPU. Update it.
+	 */
+	hrtimer_update_softirq_timer(new_base, false);
+
 	raw_spin_unlock(&old_base->lock);
 	raw_spin_unlock(&new_base->lock);
 
@@ -1696,6 +1834,7 @@ int hrtimers_dead_cpu(unsigned int scpu)
 void __init hrtimers_init(void)
 {
 	hrtimers_prepare_cpu(smp_processor_id());
+	open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
 }
 
 /**
-- 
2.11.0

Powered by blists - more mailing lists