lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250714084209.918-1-wladislav.wiebe@nokia.com>
Date: Mon, 14 Jul 2025 10:41:45 +0200
From: Wladislav Wiebe <wladislav.wiebe@...ia.com>
To: tglx@...utronix.de,
	corbet@....net
Cc: akpm@...ux-foundation.org,
	paulmck@...nel.org,
	rostedt@...dmis.org,
	Neeraj.Upadhyay@....com,
	david@...hat.com,
	bp@...en8.de,
	arnd@...db.de,
	fvdl@...gle.com,
	linux-doc@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	wladislav.wiebe@...ia.com,
	peterz@...radead.org
Subject: [PATCH v2] genirq: add support for warning on long-running IRQ handlers

This patch adds a mechanism to detect and warn about long-running IRQ
handlers exceeding a user-defined duration threshold in microseconds.

The feature is enabled via the kernel boot parameter:
"irqhandler.duration_warn_us=<threshold_in_us>"

For example, passing irqhandler.duration_warn_us=1000 will warn if an
IRQ handler takes more than 1000 microseconds.

Implementation uses local_clock() to measure the execution duration of
IRQ handlers. When the threshold is exceeded, a ratelimited warning is
printed:

"[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"

Signed-off-by: Wladislav Wiebe <wladislav.wiebe@...ia.com>
---
V1 -> V2: refactor to use local_clock() instead of jiffies and replace
	  Kconfig knobs by a new command-line parameter.
V1 link:  https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/
---
 .../admin-guide/kernel-parameters.txt         |  5 ++
 kernel/irq/handle.c                           | 48 ++++++++++++++++++-
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..fa89f21ea1e6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2543,6 +2543,11 @@
 			for it. Intended to get systems with badly broken
 			firmware running.
 
+	irqhandler.duration_warn_us= [KNL,EARLY]
+			Warn if an IRQ handler exceeds the specified duration
+			threshold in microseconds. Useful for identifying
+			long-running IRQs in the system.
+
 	irqpoll		[HW]
 			When an interrupt is not handled search all handlers
 			for it. Also check all handlers each timer
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9489f93b3db3..eab8fdfab8d8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
 	wake_up_process(action->thread);
 }
 
+static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
+static u64 irqhandler_duration_threshold_us __ro_after_init;
+
+static int __init irqhandler_duration_check_setup(char *arg)
+{
+	unsigned long val;
+	int ret;
+
+	if (!arg)
+		return 0;
+
+	ret = kstrtoul(arg, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val > 0) {
+		irqhandler_duration_threshold_us = val;
+		static_branch_enable(&irqhandler_duration_check_enabled);
+	} else {
+		pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
+
+static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
+					      struct irqaction *action)
+{
+	u64 delta_us = (local_clock() - ts_start) >> 10;
+
+	if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
+		pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
+			smp_processor_id(), irq, action->handler, delta_us);
+	}
+}
+
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 {
 	irqreturn_t retval = IRQ_NONE;
@@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 
 	for_each_action_of_desc(desc, action) {
 		irqreturn_t res;
+		u64 ts_start;
 
 		/*
 		 * If this IRQ would be threaded under force_irqthreads, mark it so.
@@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 			lockdep_hardirq_threaded();
 
 		trace_irq_handler_entry(irq, action);
-		res = action->handler(irq, action->dev_id);
+
+		if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
+			ts_start = local_clock();
+			res = action->handler(irq, action->dev_id);
+			irqhandler_duration_check(ts_start, irq, action);
+		} else
+			res = action->handler(irq, action->dev_id);
+
 		trace_irq_handler_exit(irq, action, res);
 
 		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
-- 
2.39.3.dirty


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ