lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 16 May 2024 22:04:33 +0300
From: Costa Shulyupin <costa.shul@...hat.com>
To: longman@...hat.com,
	pauld@...hat.com,
	juri.lelli@...hat.com,
	prarit@...hat.com,
	vschneid@...hat.com,
	Anna-Maria Behnsen <anna-maria@...utronix.de>,
	Frederic Weisbecker <frederic@...nel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Zefan Li <lizefan.x@...edance.com>,
	Tejun Heo <tj@...nel.org>,
	Johannes Weiner <hannes@...xchg.org>,
	Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Daniel Bristot de Oliveira <bristot@...hat.com>,
	Petr Mladek <pmladek@...e.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Masahiro Yamada <masahiroy@...nel.org>,
	Randy Dunlap <rdunlap@...radead.org>,
	Yoann Congal <yoann.congal@...le.fr>,
	"Gustavo A. R. Silva" <gustavoars@...nel.org>,
	Nhat Pham <nphamcs@...il.com>,
	Costa Shulyupin <costa.shul@...hat.com>,
	linux-kernel@...r.kernel.org,
	cgroups@...r.kernel.org
Subject: [PATCH v1 3/7] sched/isolation: Adjust affinity of hrtimers according to change of housekeeping cpumask

Adjust affinity of watchdog_cpumask, hrtimers according to
change of housekeeping.cpumasks[HK_TYPE_TIMER].

Function migrate_hrtimer_list_except() is prototyped from
migrate_hrtimer_list() and is more generic.

Potentially it can be used instead of migrate_hrtimer_list.

Function hrtimers_resettle_from_cpu() is blindly prototyped
from hrtimers_cpu_dying(). local_irq_disable() is used because
cpuhp_thread_fun() uses it before cpuhp_invoke_callback().

Core test snippets without infrastructure:

1. Create hrtimer on specific cpu with:

        set_cpus_allowed_ptr(current, cpumask_of(test_cpu));
        hrtimer_init(&test_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        test_hrtimer.function = test_hrtimer_cb;
        hrtimer_start(&test_hrtimer, -1,  HRTIMER_MODE_REL);

2. Call housekeeping_update()

3. Assure that there is only tick_nohz_handler on specified cpu
in /proc/timer_list manually or with script:

grep -E 'cpu| #[0-9]' /proc/timer_list | \
	awk "/cpu:/{y=0};/cpu: $test_cpu\$/{y=1};y"

Another alternative solution to migrate hrtimers:
1. Use cpuhp to set sched_timer offline
2. Resettle all hrtimers likewise migrate_hrtimer_list
3. Use cpuhp to set sched_timer online

Signed-off-by: Costa Shulyupin <costa.shul@...hat.com>
---
 include/linux/hrtimer.h  |  2 +
 kernel/sched/isolation.c |  2 +
 kernel/time/hrtimer.c    | 81 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index aa1e65ccb6158..004632fc7d643 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -381,8 +381,10 @@ extern void sysrq_timer_list_show(void);
 int hrtimers_prepare_cpu(unsigned int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
 int hrtimers_cpu_dying(unsigned int cpu);
+void hrtimers_resettle_from_cpu(unsigned int cpu);
 #else
 #define hrtimers_cpu_dying	NULL
+static inline void hrtimers_resettle_from_cpu(unsigned int cpu) { }
 #endif
 
 #endif
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 3b63f0212887e..85a17d39d8bb0 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -126,10 +126,12 @@ static void resettle_all_timers(cpumask_var_t enable_mask, cpumask_var_t disable
 
 	for_each_cpu(cpu, enable_mask)	{
 		timers_prepare_cpu(cpu);
+		hrtimers_prepare_cpu(cpu);
 	}
 
 	for_each_cpu(cpu, disable_mask) {
 		timers_resettle_from_cpu(cpu);
+		hrtimers_resettle_from_cpu(cpu);
 	}
 }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 492c14aac642b..7e71ebbb72348 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2201,6 +2201,87 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 	}
 }
 
+/*
+ * migrate_hrtimer_list_except - migrates hrtimers from one base to another,
+ * except specified one.
+ */
+static void migrate_hrtimer_list_except(struct hrtimer_clock_base *old_base,
+				struct hrtimer_clock_base *new_base, struct hrtimer *except)
+{
+	struct hrtimer *timer;
+	struct timerqueue_node *node;
+
+	node = timerqueue_getnext(&old_base->active);
+	while (node) {
+		timer = container_of(node, struct hrtimer, node);
+		node = timerqueue_iterate_next(node);
+		if (timer == except)
+			continue;
+
+		BUG_ON(hrtimer_callback_running(timer));
+		debug_deactivate(timer);
+
+		/*
+		 * Mark it as ENQUEUED not INACTIVE otherwise the
+		 * timer could be seen as !active and just vanish away
+		 * under us on another CPU
+		 */
+		__remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
+		timer->base = new_base;
+		/*
+		 * Enqueue the timers on the new cpu. This does not
+		 * reprogram the event device in case the timer
+		 * expires before the earliest on this CPU, but we run
+		 * hrtimer_interrupt after we migrated everything to
+		 * sort out already expired timers and reprogram the
+		 * event device.
+		 */
+		enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
+	}
+}
+
+/**
+ * hrtimers_resettle_from_cpu - resettles hrtimers from
+ * specified cpu to housekeeping cpus.
+ */
+void hrtimers_resettle_from_cpu(unsigned int isol_cpu)
+{
+	int ncpu, i;
+	struct tick_sched *ts = tick_get_tick_sched(isol_cpu);
+	struct hrtimer_cpu_base *old_base, *new_base;
+
+	local_irq_disable();
+	ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
+
+	old_base = &per_cpu(hrtimer_bases, isol_cpu);
+	new_base = &per_cpu(hrtimer_bases, ncpu);
+
+	/*
+	 * The caller is globally serialized and nobody else
+	 * takes two locks at once, deadlock is not possible.
+	 */
+	raw_spin_lock(&old_base->lock);
+	raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
+	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+		migrate_hrtimer_list_except(&old_base->clock_base[i],
+				     &new_base->clock_base[i],
+				     &ts->sched_timer);
+	}
+
+	/*
+	 * The migration might have changed the first expiring softirq
+	 * timer on this CPU. Update it.
+	 */
+	__hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+
+	raw_spin_unlock(&new_base->lock);
+	raw_spin_unlock(&old_base->lock);
+	local_irq_enable();
+
+	/* Tell the other CPU to retrigger the next event */
+	smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
+}
+
 int hrtimers_cpu_dying(unsigned int dying_cpu)
 {
 	int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
-- 
2.45.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ