From: Anna-Maria Gleixner hrtimer callbacks are always invoked in hard interrupt context. Several users in tree require soft interrupt context for their callbacks and achieve this by combining a hrtimer with a tasklet. The hrtimer schedules the tasklet in hard interrupt context and the tasklet callback gets invoked in softirq context later. That's suboptimal and aside of that the real-time patch moves most of the hrtimers into softirq context. So adding native support for hrtimers expiring in softirq context is a valuable extension for both mainline and the RT patch set. Each valid hrtimer clock id has two associated hrtimer clock bases: one for timers expiring in hardirq context and one for timers expiring in softirq context. Implement the functionality to associate a hrtimer with the hard or softirq related clock bases and update the relevant functions to take them into account when the next expiry time needs to be evaluated. Add a check into the hard interrupt context handler functions to check whether the first expiring softirq based timer has expired. If it's expired the softirq is raised and the accounting of softirq based timers to evaluate the next expiry time for programming the timer hardware is skipped until the softirq processing has finished. At the end of the softirq processing the regular processing is resumed. Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner --- include/linux/hrtimer.h | 14 +++ kernel/time/hrtimer.c | 191 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 174 insertions(+), 31 deletions(-) --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -113,6 +113,7 @@ struct hrtimer { struct hrtimer_clock_base *base; u8 state; u8 is_rel; + u8 is_soft; }; /** @@ -177,6 +178,8 @@ enum hrtimer_base_type { * @clock_was_set_seq: Sequence counter of clock was set events * @migration_enabled: The migration of hrtimers to other cpus is enabled * @nohz_active: The nohz functionality is enabled + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang @@ -185,8 +188,11 @@ enum hrtimer_base_type { * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -200,6 +206,7 @@ struct hrtimer_cpu_base { unsigned int clock_was_set_seq; unsigned int migration_enabled : 1, nohz_active : 1, + softirq_activated : 1, in_hrtirq : 1, hres_active : 1, hang_detected : 1; @@ -211,6 +218,8 @@ struct hrtimer_cpu_base { #endif ktime_t expires_next; struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -383,7 +392,8 @@ extern void hrtimer_start_range_ns(struc * @timer: the timer to be added * @tim: expiry time * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -68,6 +68,9 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +/* Define for debug mode check */ +#define HRTIMER_MODECHECK true + /* * The timer bases: * @@ -418,8 +421,17 @@ static inline void debug_hrtimer_init(st debug_object_init(timer, &hrtimer_debug_descr); } -static inline void debug_hrtimer_activate(struct hrtimer *timer) +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode, + bool modecheck) { + /* + * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft + * match, when a timer is started via__hrtimer_start_range_ns(). + */ + if (modecheck) + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); + debug_object_activate(timer, &hrtimer_debug_descr); } @@ -451,8 +463,11 @@ void destroy_hrtimer_on_stack(struct hrt EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else + static inline void debug_hrtimer_init(struct hrtimer *timer) { } -static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode, + bool modecheck) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif @@ -465,9 +480,10 @@ debug_init(struct hrtimer *timer, clocki } static inline void debug_activate(struct hrtimer *timer, - enum hrtimer_mode mode) + enum hrtimer_mode mode, + bool modecheck) { - debug_hrtimer_activate(timer); + debug_hrtimer_activate(timer, mode, modecheck); trace_hrtimer_start(timer, mode); } @@ -477,7 +493,6 @@ static inline void debug_deactivate(stru trace_hrtimer_cancel(timer); } -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) static struct hrtimer_clock_base * __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) { @@ -511,7 +526,10 @@ static ktime_t __hrtimer_next_event_base expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - cpu_base->next_timer = timer; + if (timer->is_soft) + cpu_base->softirq_next_timer = timer; + else + cpu_base->next_timer = timer; } } /* @@ -528,25 +546,42 @@ static ktime_t __hrtimer_next_event_base * Recomputes cpu_base::*next_timer and returns the earliest expires_next but * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. * + * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, + * those timers will get run whenever the softirq gets handled, at the end of + * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. + * + * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. + * The !softirq values are the minima across HRTIMER_ACTIVE, unless an actual + * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. + * * @active_mask must be one of: * - HRTIMER_ACTIVE, * - HRTIMER_ACTIVE_SOFT, or * - HRTIMER_ACTIVE_HARD. */ -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, - unsigned int active_mask) +static ktime_t +__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { unsigned int active; + struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; - cpu_base->next_timer = NULL; + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; + cpu_base->softirq_next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + + next_timer = cpu_base->softirq_next_timer; + } - active = cpu_base->active_bases & active_mask; - expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + if (active_mask & HRTIMER_ACTIVE_HARD) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + cpu_base->next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + } return expires_next; } -#endif static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { @@ -554,8 +589,14 @@ static inline ktime_t hrtimer_update_bas ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; - return ktime_get_update_offsets_now(&base->clock_was_set_seq, + ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, offs_boot, offs_tai); + + base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; + base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; + + return now; } /* @@ -582,7 +623,17 @@ hrtimer_force_reprogram(struct hrtimer_c { ktime_t expires_next; - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * Find the current next expiration time. + */ + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + + if (cpu_base->next_timer->is_soft) { + if (cpu_base->softirq_activated) + return; + + cpu_base->softirq_expires_next = expires_next; + } if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -718,6 +769,27 @@ static void hrtimer_reprogram(struct hrt WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (timer->is_soft) { + if (cpu_base->softirq_activated) + return; + + if (!ktime_before(expires, cpu_base->softirq_expires_next)) + return; + + cpu_base->softirq_next_timer = timer; + cpu_base->softirq_expires_next = expires; + + if (!ktime_before(expires, cpu_base->expires_next)) + return; + } + + /* * If the timer is not on the current cpu, we cannot reprogram * the other cpus clock event device. */ @@ -734,13 +806,6 @@ static void hrtimer_reprogram(struct hrt if (cpu_base->in_hrtirq) return; - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - if (expires >= cpu_base->expires_next) return; @@ -875,9 +940,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); */ static int enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, - enum hrtimer_mode mode) + enum hrtimer_mode mode, + bool modecheck) { - debug_activate(timer, mode); + debug_activate(timer, mode, modecheck); base->cpu_base->active_bases |= 1 << base->index; @@ -968,6 +1034,32 @@ static inline ktime_t hrtimer_update_low return tim; } +static void +hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) +{ + ktime_t expires; + + /* + * Find the next SOFT expiration. + */ + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + + /* + * reprogramming needs to be triggered, even if the next soft + * hrtimer expires at the same time than the next hard + * hrtimer. cpu_base->softirq_expires_next needs to be updated! + */ + if (!reprogram || expires == KTIME_MAX || + ktime_before(expires, cpu_base->expires_next)) + return; + + /* + * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() + * cpu_base->*expires_next is only set by hrtimer_reprogram() + */ + hrtimer_reprogram(cpu_base->softirq_next_timer); +} + static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) @@ -987,7 +1079,7 @@ static int __hrtimer_start_range_ns(stru /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - return enqueue_hrtimer(timer, new_base, mode); + return enqueue_hrtimer(timer, new_base, mode, HRTIMER_MODECHECK); } /** @@ -996,7 +1088,8 @@ static int __hrtimer_start_range_ns(stru * @tim: expiry time * @delta_ns: "slack" range for the timer * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -1106,7 +1199,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1264,7 +1357,8 @@ static void __run_hrtimer(struct hrtimer */ if (restart != HRTIMER_NORESTART && !(timer->state & HRTIMER_STATE_ENQUEUED)) - enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); + enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS, + !HRTIMER_MODECHECK); /* * Separate the ->running assignment from the ->state assignment. @@ -1316,6 +1410,23 @@ static void __hrtimer_run_queues(struct } } +static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; + ktime_t now; + + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + now = hrtimer_update_base(cpu_base); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); + + cpu_base->softirq_activated = 0; + hrtimer_update_softirq_timer(cpu_base, true); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1346,10 +1457,16 @@ void hrtimer_interrupt(struct clock_even */ cpu_base->expires_next = KTIME_MAX; + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); /* * Store the new expiry value so the migration code can verify * against it. @@ -1453,6 +1570,13 @@ void hrtimer_run_queues(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); + + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } @@ -1634,6 +1758,7 @@ int hrtimers_prepare_cpu(unsigned int cp cpu_base->cpu = cpu; cpu_base->hres_active = 0; cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; return 0; } @@ -1665,7 +1790,8 @@ static void migrate_hrtimer_list(struct * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); + enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS, + !HRTIMER_MODECHECK); } } @@ -1692,6 +1818,12 @@ int hrtimers_dead_cpu(unsigned int scpu) &new_base->clock_base[i]); } + /* + * The migration might have changed the first expiring softirq + * timer on this CPU. Update it. + */ + hrtimer_update_softirq_timer(new_base, false); + raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); @@ -1706,6 +1838,7 @@ int hrtimers_dead_cpu(unsigned int scpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } /**