[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260121162508.011240183@infradead.org>
Date: Wed, 21 Jan 2026 17:20:15 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: tglx@...utronix.de
Cc: arnd@...db.de,
anna-maria@...utronix.de,
frederic@...nel.org,
peterz@...radead.org,
luto@...nel.org,
mingo@...hat.com,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
linux-kernel@...r.kernel.org,
oliver.sang@...el.com
Subject: [PATCH v2 5/6] entry,hrtimer: Push reprogramming timers into the interrupt return path
Currently hrtimer_interrupt() runs expired timers, which can re-arm
themselves, after which it computes the next expiration time and
re-programs the hardware.
However, things like HRTICK, a highres timer driving preemption,
cannot re-arm itself at the point of running, since the next task has
not been determined yet. The schedule() in the interrupt return path
will switch to the next task, which then causes a new hrtimer to be
programmed.
This then results in reprogramming the hardware at least twice, once
after running the timers, and once upon selecting the new task.
Notably, *both* events happen in the interrupt.
By pushing the hrtimer reprogram all the way into the interrupt return
path, it runs after schedule() and this double reprogram can be
avoided.
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
include/asm-generic/thread_info_tif.h | 5 ++++-
include/linux/hrtimer.h | 17 +++++++++++++++++
include/linux/irq-entry-common.h | 2 ++
kernel/entry/common.c | 13 +++++++++++++
kernel/sched/core.c | 10 ++++++++++
kernel/time/hrtimer.c | 28 ++++++++++++++++++++++++----
6 files changed, 70 insertions(+), 5 deletions(-)
--- a/include/asm-generic/thread_info_tif.h
+++ b/include/asm-generic/thread_info_tif.h
@@ -41,11 +41,14 @@
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#ifdef HAVE_TIF_RESTORE_SIGMASK
-# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */
+# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal()
# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#endif
#define TIF_RSEQ 11 // Run RSEQ fast path
#define _TIF_RSEQ BIT(TIF_RSEQ)
+#define TIF_HRTIMER_REARM 12 // re-arm the timer
+#define _TIF_HRTIMER_REARM BIT(TIF_HRTIMER_REARM)
+
#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -175,10 +175,27 @@ extern void hrtimer_interrupt(struct clo
extern unsigned int hrtimer_resolution;
+#ifdef TIF_HRTIMER_REARM
+extern void _hrtimer_rearm(void);
+/*
+ * This is to be called on all irqentry_exit() paths that will enable
+ * interrupts; as well as in the context switch path before switch_to().
+ */
+static inline void hrtimer_rearm(void)
+{
+ if (test_thread_flag(TIF_HRTIMER_REARM))
+ _hrtimer_rearm();
+}
+#else
+static inline void hrtimer_rearm(void) { }
+#endif /* TIF_HRTIMER_REARM */
+
#else
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
+static inline void hrtimer_rearm(void) { }
+
#endif
static inline ktime_t
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -224,6 +224,8 @@ static __always_inline void __exit_to_us
ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
+ else
+ hrtimer_rearm();
arch_exit_to_user_mode_prepare(regs, ti_work);
}
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -7,6 +7,7 @@
#include <linux/kmsan.h>
#include <linux/livepatch.h>
#include <linux/tick.h>
+#include <linux/hrtimer.h>
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
@@ -26,6 +27,16 @@ static __always_inline unsigned long __e
*/
while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
+ /*
+ * If hrtimer need re-arming, do so before enabling IRQs,
+ * except when a reschedule is needed, in that case schedule()
+ * will do this.
+ */
+ if ((ti_work & (_TIF_NEED_RESCHED |
+ _TIF_NEED_RESCHED_LAZY |
+ _TIF_HRTIMER_REARM)) == _TIF_HRTIMER_REARM)
+ hrtimer_rearm();
+
local_irq_enable_exit_to_user(ti_work);
if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
@@ -202,6 +213,7 @@ noinstr void irqentry_exit(struct pt_reg
*/
if (state.exit_rcu) {
instrumentation_begin();
+ hrtimer_rearm();
/* Tell the tracer that IRET will enable interrupts */
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
@@ -215,6 +227,7 @@ noinstr void irqentry_exit(struct pt_reg
if (IS_ENABLED(CONFIG_PREEMPTION))
irqentry_exit_cond_resched();
+ hrtimer_rearm();
/* Covers both tracing and lockdep */
trace_hardirqs_on();
instrumentation_end();
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6814,6 +6814,16 @@ static void __sched notrace __schedule(i
keep_resched:
rq->last_seen_need_resched_ns = 0;
+ /*
+ * Notably, this must be called after pick_next_task() but before
+ * switch_to(), since the new task need not be on the return from
+ * interrupt path. Additionally, exit_to_user_mode_loop() relies on
+ * any schedule() call to imply this call, so do it unconditionally.
+ *
+ * We've just cleared TIF_NEED_RESCHED, TIF word should be in cache.
+ */
+ hrtimer_rearm();
+
is_switch = prev != next;
if (likely(is_switch)) {
rq->nr_switches++;
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1892,10 +1892,9 @@ static __latent_entropy void hrtimer_run
* Very similar to hrtimer_force_reprogram(), except it deals with
* in_hrirq and hang_detected.
*/
-static void __hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now)
+static void __hrtimer_rearm(struct hrtimer_cpu_base *cpu_base,
+ ktime_t now, ktime_t expires_next)
{
- ktime_t expires_next = hrtimer_update_next_event(cpu_base);
-
cpu_base->expires_next = expires_next;
cpu_base->in_hrtirq = 0;
@@ -1970,9 +1969,30 @@ void hrtimer_interrupt(struct clock_even
cpu_base->hang_detected = 1;
}
- __hrtimer_rearm(cpu_base, now);
+#ifdef TIF_HRTIMER_REARM
+ set_thread_flag(TIF_HRTIMER_REARM);
+#else
+ __hrtimer_rearm(cpu_base, now, expires_next);
+#endif
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
}
+
+#ifdef TIF_HRTIMER_REARM
+void _hrtimer_rearm(void)
+{
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+ ktime_t now, expires_next;
+
+ lockdep_assert_irqs_disabled();
+
+ scoped_guard (raw_spinlock, &cpu_base->lock) {
+ now = hrtimer_update_base(cpu_base);
+ expires_next = hrtimer_update_next_event(cpu_base);
+ __hrtimer_rearm(cpu_base, now, expires_next);
+ clear_thread_flag(TIF_HRTIMER_REARM);
+ }
+}
+#endif /* TIF_HRTIMER_REARM */
#endif /* !CONFIG_HIGH_RES_TIMERS */
/*
Powered by blists - more mailing lists