[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <175365567943.581055.6806236199247802443.tglx@xen13>
Date: Mon, 28 Jul 2025 00:37:50 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org, x86@...nel.org
Subject: [GIT pull] irq/core for v6.17-rc1
Linus,
please pull the latest irq/core branch from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq-core-2025-07-27
up to: 8d39d6ec4db5: genirq: Prevent migration live lock in handle_edge_irq()
Updates for the interrupt core subsystem:
- Prevent a interrupt migration related live lock in handle_edge_irq()
If the interrupt affinity is moved to a new target CPU and the
interrupt is currently handled on the previous target CPU for edge type
interrupts the handler might get stuck on the previous target for a
long time, which causes both involved CPUs to waste cycles and
eventually run into a soft-lockup situation.
Solve this by checking whether the interrupt is redirected to a new
target CPU and if the interrupt is handled on that new target CPU, busy
wait for completion instead of masking it and sending the pending but
which would cause the old CPU to re-run the handler and in the worst
case repeating this excercise for a long time.
This only works on architectures which use single CPU interrupt
targets, but that's so far the only ones where this behaviour has been
observed.
- Add a kunit test for interrupt disable depth counts
The nested interrupt disable depth has been an issue in the past
especially vs. free_irq(), interrupt shutdown and CPU hotplug and their
interactions. The test exercises the combinations of these scenarios
and checks for correctness.
Thanks,
tglx
------------------>
Brian Norris (1):
genirq: Add kunit tests for depth counts
Thomas Gleixner (4):
genirq: Remove pointless local variable
genirq: Move irq_wait_for_poll() to call site
genirq: Split up irq_pm_check_wakeup()
genirq: Prevent migration live lock in handle_edge_irq()
kernel/irq/Kconfig | 11 +++
kernel/irq/Makefile | 1 +
kernel/irq/chip.c | 72 +++++++++++++---
kernel/irq/internals.h | 6 +-
kernel/irq/irq_test.c | 229 +++++++++++++++++++++++++++++++++++++++++++++++++
kernel/irq/pm.c | 16 ++--
kernel/irq/spurious.c | 37 +-------
7 files changed, 313 insertions(+), 59 deletions(-)
create mode 100644 kernel/irq/irq_test.c
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3f02a0e45254..1da5e9d9da71 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -144,6 +144,17 @@ config GENERIC_IRQ_DEBUGFS
config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD
bool
+config IRQ_KUNIT_TEST
+ bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS
+ depends on KUNIT=y
+ default KUNIT_ALL_TESTS
+ imply SMP
+ help
+ This option enables KUnit tests for the IRQ subsystem API. These are
+ only for development and testing, not for regular kernel use cases.
+
+ If unsure, say N.
+
endmenu
config GENERIC_IRQ_MULTI_HANDLER
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index c0f44c06d69d..6ab3a4055667 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_GENERIC_IRQ_IPI_MUX) += ipi-mux.o
obj-$(CONFIG_SMP) += affinity.o
obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o
+obj-$(CONFIG_IRQ_KUNIT_TEST) += irq_test.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 2b274007e8ba..624106e886ad 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -457,22 +457,33 @@ void unmask_threaded_irq(struct irq_desc *desc)
unmask_irq(desc);
}
-static bool irq_check_poll(struct irq_desc *desc)
-{
- if (!(desc->istate & IRQS_POLL_INPROGRESS))
- return false;
- return irq_wait_for_poll(desc);
+/* Busy wait until INPROGRESS is cleared */
+static bool irq_wait_on_inprogress(struct irq_desc *desc)
+{
+ if (IS_ENABLED(CONFIG_SMP)) {
+ do {
+ raw_spin_unlock(&desc->lock);
+ while (irqd_irq_inprogress(&desc->irq_data))
+ cpu_relax();
+ raw_spin_lock(&desc->lock);
+ } while (irqd_irq_inprogress(&desc->irq_data));
+
+ /* Might have been disabled in meantime */
+ return !irqd_irq_disabled(&desc->irq_data) && desc->action;
+ }
+ return false;
}
static bool irq_can_handle_pm(struct irq_desc *desc)
{
- unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;
+ struct irq_data *irqd = &desc->irq_data;
+ const struct cpumask *aff;
/*
* If the interrupt is not in progress and is not an armed
* wakeup interrupt, proceed.
*/
- if (!irqd_has_set(&desc->irq_data, mask))
+ if (!irqd_has_set(irqd, IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED))
return true;
/*
@@ -480,13 +491,54 @@ static bool irq_can_handle_pm(struct irq_desc *desc)
* and suspended, disable it and notify the pm core about the
* event.
*/
- if (irq_pm_check_wakeup(desc))
+ if (unlikely(irqd_has_set(irqd, IRQD_WAKEUP_ARMED))) {
+ irq_pm_handle_wakeup(desc);
+ return false;
+ }
+
+ /* Check whether the interrupt is polled on another CPU */
+ if (unlikely(desc->istate & IRQS_POLL_INPROGRESS)) {
+ if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
+ "irq poll in progress on cpu %d for irq %d\n",
+ smp_processor_id(), desc->irq_data.irq))
+ return false;
+ return irq_wait_on_inprogress(desc);
+ }
+
+ /* The below works only for single target interrupts */
+ if (!IS_ENABLED(CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK) ||
+ !irqd_is_single_target(irqd) || desc->handle_irq != handle_edge_irq)
return false;
/*
- * Handle a potential concurrent poll on a different core.
+ * If the interrupt affinity was moved to this CPU and the
+ * interrupt is currently handled on the previous target CPU, then
+ * busy wait for INPROGRESS to be cleared. Otherwise for edge type
+ * interrupts the handler might get stuck on the previous target:
+ *
+ * CPU 0 CPU 1 (new target)
+ * handle_edge_irq()
+ * repeat:
+ * handle_event() handle_edge_irq()
+ * if (INPROGESS) {
+ * set(PENDING);
+ * mask();
+ * return;
+ * }
+ * if (PENDING) {
+ * clear(PENDING);
+ * unmask();
+ * goto repeat;
+ * }
+ *
+ * This happens when the device raises interrupts with a high rate
+ * and always before handle_event() completes and the CPU0 handler
+ * can clear INPROGRESS. This has been observed in virtual machines.
*/
- return irq_check_poll(desc);
+ aff = irq_data_get_effective_affinity_mask(irqd);
+ if (cpumask_first(aff) != smp_processor_id())
+ return false;
+ return irq_wait_on_inprogress(desc);
}
static inline bool irq_can_handle_actions(struct irq_desc *desc)
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index aebfe225c9a6..0164ca48da59 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -20,6 +20,7 @@
#define istate core_internal_state__do_not_mess_with_it
extern bool noirqdebug;
+extern int irq_poll_cpu;
extern struct irqaction chained_action;
@@ -112,7 +113,6 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
int check_irq_resend(struct irq_desc *desc, bool inject);
void clear_irq_resend(struct irq_desc *desc);
void irq_resend_init(struct irq_desc *desc);
-bool irq_wait_for_poll(struct irq_desc *desc);
void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
void wake_threads_waitq(struct irq_desc *desc);
@@ -277,11 +277,11 @@ static inline bool irq_is_nmi(struct irq_desc *desc)
}
#ifdef CONFIG_PM_SLEEP
-bool irq_pm_check_wakeup(struct irq_desc *desc);
+void irq_pm_handle_wakeup(struct irq_desc *desc);
void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
#else
-static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; }
+static inline void irq_pm_handle_wakeup(struct irq_desc *desc) { }
static inline void
irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
static inline void
diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c
new file mode 100644
index 000000000000..5161b56a12f9
--- /dev/null
+++ b/kernel/irq/irq_test.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: LGPL-2.1+
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/irqdomain.h>
+#include <linux/nodemask.h>
+#include <kunit/test.h>
+
+#include "internals.h"
+
+static irqreturn_t noop_handler(int irq, void *data)
+{
+ return IRQ_HANDLED;
+}
+
+static void noop(struct irq_data *data) { }
+static unsigned int noop_ret(struct irq_data *data) { return 0; }
+
+static int noop_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
+{
+ irq_data_update_effective_affinity(data, dest);
+
+ return 0;
+}
+
+static struct irq_chip fake_irq_chip = {
+ .name = "fake",
+ .irq_startup = noop_ret,
+ .irq_shutdown = noop,
+ .irq_enable = noop,
+ .irq_disable = noop,
+ .irq_ack = noop,
+ .irq_mask = noop,
+ .irq_unmask = noop,
+ .irq_set_affinity = noop_affinity,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+static void irq_disable_depth_test(struct kunit *test)
+{
+ struct irq_desc *desc;
+ int virq, ret;
+
+ virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL);
+ KUNIT_ASSERT_GE(test, virq, 0);
+
+ irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+
+ desc = irq_to_desc(virq);
+ KUNIT_ASSERT_PTR_NE(test, desc, NULL);
+
+ ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
+ KUNIT_EXPECT_EQ(test, ret, 0);
+
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ disable_irq(virq);
+ KUNIT_EXPECT_EQ(test, desc->depth, 1);
+
+ enable_irq(virq);
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ free_irq(virq, NULL);
+}
+
+static void irq_free_disabled_test(struct kunit *test)
+{
+ struct irq_desc *desc;
+ int virq, ret;
+
+ virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL);
+ KUNIT_ASSERT_GE(test, virq, 0);
+
+ irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+
+ desc = irq_to_desc(virq);
+ KUNIT_ASSERT_PTR_NE(test, desc, NULL);
+
+ ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
+ KUNIT_EXPECT_EQ(test, ret, 0);
+
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ disable_irq(virq);
+ KUNIT_EXPECT_EQ(test, desc->depth, 1);
+
+ free_irq(virq, NULL);
+ KUNIT_EXPECT_GE(test, desc->depth, 1);
+
+ ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
+ KUNIT_EXPECT_EQ(test, ret, 0);
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ free_irq(virq, NULL);
+}
+
+static void irq_shutdown_depth_test(struct kunit *test)
+{
+ struct irq_desc *desc;
+ struct irq_data *data;
+ int virq, ret;
+ struct irq_affinity_desc affinity = {
+ .is_managed = 1,
+ .mask = CPU_MASK_ALL,
+ };
+
+ if (!IS_ENABLED(CONFIG_SMP))
+ kunit_skip(test, "requires CONFIG_SMP for managed shutdown");
+
+ virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity);
+ KUNIT_ASSERT_GE(test, virq, 0);
+
+ irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+
+ desc = irq_to_desc(virq);
+ KUNIT_ASSERT_PTR_NE(test, desc, NULL);
+
+ data = irq_desc_get_irq_data(desc);
+ KUNIT_ASSERT_PTR_NE(test, data, NULL);
+
+ ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
+ KUNIT_EXPECT_EQ(test, ret, 0);
+
+ KUNIT_EXPECT_TRUE(test, irqd_is_activated(data));
+ KUNIT_EXPECT_TRUE(test, irqd_is_started(data));
+ KUNIT_EXPECT_TRUE(test, irqd_affinity_is_managed(data));
+
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ disable_irq(virq);
+ KUNIT_EXPECT_EQ(test, desc->depth, 1);
+
+ irq_shutdown_and_deactivate(desc);
+
+ KUNIT_EXPECT_FALSE(test, irqd_is_activated(data));
+ KUNIT_EXPECT_FALSE(test, irqd_is_started(data));
+
+ KUNIT_EXPECT_EQ(test, irq_activate(desc), 0);
+#ifdef CONFIG_SMP
+ irq_startup_managed(desc);
+#endif
+
+ KUNIT_EXPECT_EQ(test, desc->depth, 1);
+
+ enable_irq(virq);
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ free_irq(virq, NULL);
+}
+
+static void irq_cpuhotplug_test(struct kunit *test)
+{
+ struct irq_desc *desc;
+ struct irq_data *data;
+ int virq, ret;
+ struct irq_affinity_desc affinity = {
+ .is_managed = 1,
+ };
+
+ if (!IS_ENABLED(CONFIG_SMP))
+ kunit_skip(test, "requires CONFIG_SMP for CPU hotplug");
+ if (!get_cpu_device(1))
+ kunit_skip(test, "requires more than 1 CPU for CPU hotplug");
+ if (!cpu_is_hotpluggable(1))
+ kunit_skip(test, "CPU 1 must be hotpluggable");
+
+ cpumask_copy(&affinity.mask, cpumask_of(1));
+
+ virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity);
+ KUNIT_ASSERT_GE(test, virq, 0);
+
+ irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq);
+
+ desc = irq_to_desc(virq);
+ KUNIT_ASSERT_PTR_NE(test, desc, NULL);
+
+ data = irq_desc_get_irq_data(desc);
+ KUNIT_ASSERT_PTR_NE(test, data, NULL);
+
+ ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
+ KUNIT_EXPECT_EQ(test, ret, 0);
+
+ KUNIT_EXPECT_TRUE(test, irqd_is_activated(data));
+ KUNIT_EXPECT_TRUE(test, irqd_is_started(data));
+ KUNIT_EXPECT_TRUE(test, irqd_affinity_is_managed(data));
+
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ disable_irq(virq);
+ KUNIT_EXPECT_EQ(test, desc->depth, 1);
+
+ KUNIT_EXPECT_EQ(test, remove_cpu(1), 0);
+ KUNIT_EXPECT_FALSE(test, irqd_is_activated(data));
+ KUNIT_EXPECT_FALSE(test, irqd_is_started(data));
+ KUNIT_EXPECT_GE(test, desc->depth, 1);
+ KUNIT_EXPECT_EQ(test, add_cpu(1), 0);
+
+ KUNIT_EXPECT_FALSE(test, irqd_is_activated(data));
+ KUNIT_EXPECT_FALSE(test, irqd_is_started(data));
+ KUNIT_EXPECT_EQ(test, desc->depth, 1);
+
+ enable_irq(virq);
+ KUNIT_EXPECT_TRUE(test, irqd_is_activated(data));
+ KUNIT_EXPECT_TRUE(test, irqd_is_started(data));
+ KUNIT_EXPECT_EQ(test, desc->depth, 0);
+
+ free_irq(virq, NULL);
+}
+
+static struct kunit_case irq_test_cases[] = {
+ KUNIT_CASE(irq_disable_depth_test),
+ KUNIT_CASE(irq_free_disabled_test),
+ KUNIT_CASE(irq_shutdown_depth_test),
+ KUNIT_CASE(irq_cpuhotplug_test),
+ {}
+};
+
+static struct kunit_suite irq_test_suite = {
+ .name = "irq_test_cases",
+ .test_cases = irq_test_cases,
+};
+
+kunit_test_suite(irq_test_suite);
+MODULE_DESCRIPTION("IRQ unit test suite");
+MODULE_LICENSE("GPL");
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 445912d51033..f7394729cedc 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -13,17 +13,13 @@
#include "internals.h"
-bool irq_pm_check_wakeup(struct irq_desc *desc)
+void irq_pm_handle_wakeup(struct irq_desc *desc)
{
- if (irqd_is_wakeup_armed(&desc->irq_data)) {
- irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
- desc->istate |= IRQS_SUSPENDED | IRQS_PENDING;
- desc->depth++;
- irq_disable(desc);
- pm_system_irq_wakeup(irq_desc_get_irq(desc));
- return true;
- }
- return false;
+ irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
+ desc->istate |= IRQS_SUSPENDED | IRQS_PENDING;
+ desc->depth++;
+ irq_disable(desc);
+ pm_system_irq_wakeup(irq_desc_get_irq(desc));
}
/*
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 8f26982e7300..73280ccb74b0 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -19,44 +19,9 @@ static int irqfixup __read_mostly;
#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
static void poll_spurious_irqs(struct timer_list *unused);
static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
-static int irq_poll_cpu;
+int irq_poll_cpu;
static atomic_t irq_poll_active;
-/*
- * We wait here for a poller to finish.
- *
- * If the poll runs on this CPU, then we yell loudly and return
- * false. That will leave the interrupt line disabled in the worst
- * case, but it should never happen.
- *
- * We wait until the poller is done and then recheck disabled and
- * action (about to be disabled). Only if it's still active, we return
- * true and let the handler run.
- */
-bool irq_wait_for_poll(struct irq_desc *desc)
-{
- lockdep_assert_held(&desc->lock);
-
- if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
- "irq poll in progress on cpu %d for irq %d\n",
- smp_processor_id(), desc->irq_data.irq))
- return false;
-
-#ifdef CONFIG_SMP
- do {
- raw_spin_unlock(&desc->lock);
- while (irqd_irq_inprogress(&desc->irq_data))
- cpu_relax();
- raw_spin_lock(&desc->lock);
- } while (irqd_irq_inprogress(&desc->irq_data));
- /* Might have been disabled in meantime */
- return !irqd_irq_disabled(&desc->irq_data) && desc->action;
-#else
- return false;
-#endif
-}
-
-
/*
* Recovery handler for misrouted interrupts.
*/
Powered by blists - more mailing lists