[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240516190437.3545310-4-costa.shul@redhat.com>
Date: Thu, 16 May 2024 22:04:33 +0300
From: Costa Shulyupin <costa.shul@...hat.com>
To: longman@...hat.com,
pauld@...hat.com,
juri.lelli@...hat.com,
prarit@...hat.com,
vschneid@...hat.com,
Anna-Maria Behnsen <anna-maria@...utronix.de>,
Frederic Weisbecker <frederic@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Zefan Li <lizefan.x@...edance.com>,
Tejun Heo <tj@...nel.org>,
Johannes Weiner <hannes@...xchg.org>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Daniel Bristot de Oliveira <bristot@...hat.com>,
Petr Mladek <pmladek@...e.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Masahiro Yamada <masahiroy@...nel.org>,
Randy Dunlap <rdunlap@...radead.org>,
Yoann Congal <yoann.congal@...le.fr>,
"Gustavo A. R. Silva" <gustavoars@...nel.org>,
Nhat Pham <nphamcs@...il.com>,
Costa Shulyupin <costa.shul@...hat.com>,
linux-kernel@...r.kernel.org,
cgroups@...r.kernel.org
Subject: [PATCH v1 3/7] sched/isolation: Adjust affinity of hrtimers according to change of housekeeping cpumask
Adjust affinity of watchdog_cpumask, hrtimers according to
change of housekeeping.cpumasks[HK_TYPE_TIMER].
Function migrate_hrtimer_list_except() is prototyped from
migrate_hrtimer_list() and is more generic.
Potentially it can be used instead of migrate_hrtimer_list.
Function hrtimers_resettle_from_cpu() is blindly prototyped
from hrtimers_cpu_dying(). local_irq_disable() is used because
cpuhp_thread_fun() uses it before cpuhp_invoke_callback().
Core test snippets without infrastructure:
1. Create hrtimer on specific cpu with:
set_cpus_allowed_ptr(current, cpumask_of(test_cpu));
hrtimer_init(&test_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
test_hrtimer.function = test_hrtimer_cb;
hrtimer_start(&test_hrtimer, -1, HRTIMER_MODE_REL);
2. Call housekeeping_update()
3. Assure that there is only tick_nohz_handler on specified cpu
in /proc/timer_list manually or with script:
grep -E 'cpu| #[0-9]' /proc/timer_list | \
awk "/cpu:/{y=0};/cpu: $test_cpu\$/{y=1};y"
Another alternative solution to migrate hrtimers:
1. Use cpuhp to set sched_timer offline
2. Resettle all hrtimers likewise migrate_hrtimer_list
3. Use cpuhp to set sched_timer online
Signed-off-by: Costa Shulyupin <costa.shul@...hat.com>
---
include/linux/hrtimer.h | 2 +
kernel/sched/isolation.c | 2 +
kernel/time/hrtimer.c | 81 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 85 insertions(+)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index aa1e65ccb6158..004632fc7d643 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -381,8 +381,10 @@ extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_cpu_dying(unsigned int cpu);
+void hrtimers_resettle_from_cpu(unsigned int cpu);
#else
#define hrtimers_cpu_dying NULL
+static inline void hrtimers_resettle_from_cpu(unsigned int cpu) { }
#endif
#endif
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 3b63f0212887e..85a17d39d8bb0 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -126,10 +126,12 @@ static void resettle_all_timers(cpumask_var_t enable_mask, cpumask_var_t disable
for_each_cpu(cpu, enable_mask) {
timers_prepare_cpu(cpu);
+ hrtimers_prepare_cpu(cpu);
}
for_each_cpu(cpu, disable_mask) {
timers_resettle_from_cpu(cpu);
+ hrtimers_resettle_from_cpu(cpu);
}
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 492c14aac642b..7e71ebbb72348 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2201,6 +2201,87 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
}
}
+/*
+ * migrate_hrtimer_list_except - migrates hrtimers from one base to another,
+ * except specified one.
+ */
+static void migrate_hrtimer_list_except(struct hrtimer_clock_base *old_base,
+ struct hrtimer_clock_base *new_base, struct hrtimer *except)
+{
+ struct hrtimer *timer;
+ struct timerqueue_node *node;
+
+ node = timerqueue_getnext(&old_base->active);
+ while (node) {
+ timer = container_of(node, struct hrtimer, node);
+ node = timerqueue_iterate_next(node);
+ if (timer == except)
+ continue;
+
+ BUG_ON(hrtimer_callback_running(timer));
+ debug_deactivate(timer);
+
+ /*
+ * Mark it as ENQUEUED not INACTIVE otherwise the
+ * timer could be seen as !active and just vanish away
+ * under us on another CPU
+ */
+ __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
+ timer->base = new_base;
+ /*
+ * Enqueue the timers on the new cpu. This does not
+ * reprogram the event device in case the timer
+ * expires before the earliest on this CPU, but we run
+ * hrtimer_interrupt after we migrated everything to
+ * sort out already expired timers and reprogram the
+ * event device.
+ */
+ enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
+ }
+}
+
+/**
+ * hrtimers_resettle_from_cpu - resettles hrtimers from
+ * specified cpu to housekeeping cpus.
+ */
+void hrtimers_resettle_from_cpu(unsigned int isol_cpu)
+{
+ int ncpu, i;
+ struct tick_sched *ts = tick_get_tick_sched(isol_cpu);
+ struct hrtimer_cpu_base *old_base, *new_base;
+
+ local_irq_disable();
+ ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
+
+ old_base = &per_cpu(hrtimer_bases, isol_cpu);
+ new_base = &per_cpu(hrtimer_bases, ncpu);
+
+ /*
+ * The caller is globally serialized and nobody else
+ * takes two locks at once, deadlock is not possible.
+ */
+ raw_spin_lock(&old_base->lock);
+ raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
+ for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+ migrate_hrtimer_list_except(&old_base->clock_base[i],
+ &new_base->clock_base[i],
+ &ts->sched_timer);
+ }
+
+ /*
+ * The migration might have changed the first expiring softirq
+ * timer on this CPU. Update it.
+ */
+ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+
+ raw_spin_unlock(&new_base->lock);
+ raw_spin_unlock(&old_base->lock);
+ local_irq_enable();
+
+ /* Tell the other CPU to retrigger the next event */
+ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
+}
+
int hrtimers_cpu_dying(unsigned int dying_cpu)
{
int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
--
2.45.0
Powered by blists - more mailing lists