[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251003160421.951448-2-rrendec@redhat.com>
Date: Fri, 3 Oct 2025 12:04:19 -0400
From: Radu Rendec <rrendec@...hat.com>
To: Thomas Gleixner <tglx@...utronix.de>,
Manivannan Sadhasivam <mani@...nel.org>
Cc: Bjorn Helgaas <bhelgaas@...gle.com>,
Rob Herring <robh@...nel.org>,
Krzysztof WilczyĆski <kwilczynski@...nel.org>,
Lorenzo Pieralisi <lpieralisi@...nel.org>,
Jingoo Han <jingoohan1@...il.com>,
Brian Masney <bmasney@...hat.com>,
Eric Chanudet <echanude@...hat.com>,
Alessandro Carminati <acarmina@...hat.com>,
Jared Kangas <jkangas@...hat.com>,
linux-pci@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH 1/3] genirq: Add interrupt redirection infrastructure
From: Thomas Gleixner <tglx@...utronix.de>
Add infrastructure to redirect interrupt handler execution to a
different CPU when the current CPU is not part of the interrupt's CPU
affinity mask.
This is primarily aimed at (de)multiplexed interrupts, where the child
interrupt handler runs in the context of the parent interrupt handler,
and therefore CPU affinity control for the child interrupt is typically
not available.
With the new infrastructure, the child interrupt is allowed to freely
change its affinity setting, independently of the parent. If the
interrupt handler happens to be triggered on an "incompatible" CPU (a
CPU that's not part of the child interrupt's affinity mask), the handler
is redirected and runs in IRQ work context on a "compatible" CPU.
No functional change is being made to any existing irqchip driver, and
irqchip drivers must be explicitly modified to use the newly added
infrastructure to support interrupt redirection.
This is a direct follow up to the patches that Thomas Gleixner proposed
in https://lore.kernel.org/linux-pci/878qpg4o4t.ffs@tglx/
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Link: https://lore.kernel.org/linux-pci/878qpg4o4t.ffs@tglx/
Co-developed-by: Radu Rendec <rrendec@...hat.com>
Signed-off-by: Radu Rendec <rrendec@...hat.com>
---
include/linux/irq.h | 6 +++++
include/linux/irqdesc.h | 11 ++++++++-
kernel/irq/chip.c | 20 ++++++++++++++++
kernel/irq/irqdesc.c | 51 +++++++++++++++++++++++++++++++++++++++--
kernel/irq/manage.c | 16 +++++++++++--
5 files changed, 99 insertions(+), 5 deletions(-)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c67e76fbcc077..484d4aed08084 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -459,6 +459,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* checks against the supplied affinity mask are not
* required. This is used for CPU hotplug where the
* target CPU is not yet set in the cpu_online_mask.
+ * @irq_pre_redirect: Optional function to be invoked before redirecting
+ * an interrupt via irq_work.
* @irq_retrigger: resend an IRQ to the CPU
* @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
* @irq_set_wake: enable/disable power-management wake-on of an IRQ
@@ -503,6 +505,7 @@ struct irq_chip {
void (*irq_eoi)(struct irq_data *data);
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
+ void (*irq_pre_redirect)(struct irq_data *data);
int (*irq_retrigger)(struct irq_data *data);
int (*irq_set_type)(struct irq_data *data, unsigned int flow_type);
int (*irq_set_wake)(struct irq_data *data, unsigned int on);
@@ -690,6 +693,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data);
extern void irq_chip_release_resources_parent(struct irq_data *data);
#endif
+int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force);
+void irq_chip_pre_redirect_parent(struct irq_data *data);
+
/* Disable or mask interrupts during a kernel kexec */
extern void machine_kexec_mask_interrupts(void);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index fd091c35d5721..aeead91884668 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -2,9 +2,10 @@
#ifndef _LINUX_IRQDESC_H
#define _LINUX_IRQDESC_H
-#include <linux/rcupdate.h>
+#include <linux/irq_work.h>
#include <linux/kobject.h>
#include <linux/mutex.h>
+#include <linux/rcupdate.h>
/*
* Core internal functions to deal with irq descriptors
@@ -29,6 +30,11 @@ struct irqstat {
#endif
};
+struct irq_redirect {
+ struct irq_work work;
+ unsigned int fallback_cpu;
+};
+
/**
* struct irq_desc - interrupt descriptor
* @irq_common_data: per irq and chip data passed down to chip functions
@@ -46,6 +52,7 @@ struct irqstat {
* @threads_handled: stats field for deferred spurious detection of threaded handlers
* @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
* @lock: locking for SMP
+ * @redirect: Facility for redirecting interrupts via irq_work
* @affinity_hint: hint to user space for preferred irq affinity
* @affinity_notify: context for notification of affinity changes
* @pending_mask: pending rebalanced interrupts
@@ -84,6 +91,7 @@ struct irq_desc {
struct cpumask *percpu_enabled;
const struct cpumask *percpu_affinity;
#ifdef CONFIG_SMP
+ struct irq_redirect redirect;
const struct cpumask *affinity_hint;
struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -186,6 +194,7 @@ int generic_handle_irq_safe(unsigned int irq);
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq);
int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
+bool generic_handle_demux_domain_irq(struct irq_domain *domain, unsigned int hwirq);
#endif
/* Test to see if a driver has successfully requested an irq */
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3ffa0d80ddd19..8e74c6fc63f86 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1215,6 +1215,26 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);
#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */
+#ifdef CONFIG_SMP
+
+int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force)
+{
+ struct irq_redirect *redir = &irq_data_to_desc(data)->redirect;
+
+ WRITE_ONCE(redir->fallback_cpu, cpumask_first(dest));
+ return IRQ_SET_MASK_OK;
+}
+EXPORT_SYMBOL_GPL(irq_chip_redirect_set_affinity);
+
+void irq_chip_pre_redirect_parent(struct irq_data *data)
+{
+ data = data->parent_data;
+ data->chip->irq_pre_redirect(data);
+}
+EXPORT_SYMBOL_GPL(irq_chip_pre_redirect_parent);
+
+#endif
+
/**
* irq_chip_set_parent_state - set the state of a parent interrupt.
*
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index db714d3014b5f..d704025751315 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -78,8 +78,12 @@ static int alloc_masks(struct irq_desc *desc, int node)
return 0;
}
-static void desc_smp_init(struct irq_desc *desc, int node,
- const struct cpumask *affinity)
+static void irq_redirect_work(struct irq_work *work)
+{
+ handle_irq_desc(container_of(work, struct irq_desc, redirect.work));
+}
+
+static void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity)
{
if (!affinity)
affinity = irq_default_affinity;
@@ -91,6 +95,7 @@ static void desc_smp_init(struct irq_desc *desc, int node,
#ifdef CONFIG_NUMA
desc->irq_common_data.node = node;
#endif
+ desc->redirect.work = IRQ_WORK_INIT_HARD(irq_redirect_work);
}
static void free_masks(struct irq_desc *desc)
@@ -766,6 +771,48 @@ int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq)
WARN_ON_ONCE(!in_nmi());
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
+
+static bool demux_redirect_remote(struct irq_desc *desc)
+{
+#ifdef CONFIG_SMP
+ const struct cpumask *m = irq_data_get_effective_affinity_mask(&desc->irq_data);
+ unsigned int target_cpu = READ_ONCE(desc->redirect.fallback_cpu);
+
+ if (!cpumask_test_cpu(smp_processor_id(), m)) {
+ /* Protect against shutdown */
+ if (desc->action)
+ irq_work_queue_on(&desc->redirect.work, target_cpu);
+ return true;
+ }
+#endif
+ return false;
+}
+
+/**
+ * generic_handle_demux_domain_irq - Invoke the handler for a hardware interrupt
+ * of a demultiplexing domain.
+ * @domain: The domain where to perform the lookup
+ * @hwirq: The hardware interrupt number to convert to a logical one
+ *
+ * Returns: True on success, or false if lookup has failed
+ */
+bool generic_handle_demux_domain_irq(struct irq_domain *domain, unsigned int hwirq)
+{
+ struct irq_desc *desc = irq_resolve_mapping(domain, hwirq);
+
+ if (unlikely(!desc))
+ return false;
+
+ scoped_guard(raw_spinlock, &desc->lock) {
+ if (desc->irq_data.chip->irq_pre_redirect)
+ desc->irq_data.chip->irq_pre_redirect(&desc->irq_data);
+ if (demux_redirect_remote(desc))
+ return true;
+ }
+ return !handle_irq_desc(desc);
+}
+EXPORT_SYMBOL_GPL(generic_handle_demux_domain_irq);
+
#endif
/* Dynamic interrupt handling */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c94837382037e..ed8f8b2667b0b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -35,6 +35,16 @@ static int __init setup_forced_irqthreads(char *arg)
early_param("threadirqs", setup_forced_irqthreads);
#endif
+#ifdef CONFIG_SMP
+static inline void synchronize_irqwork(struct irq_desc *desc)
+{
+ /* Synchronize pending or on the fly redirect work */
+ irq_work_sync(&desc->redirect.work);
+}
+#else
+static inline void synchronize_irqwork(struct irq_desc *desc) { }
+#endif
+
static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state);
static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
@@ -43,6 +53,8 @@ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
bool inprogress;
do {
+ synchronize_irqwork(desc);
+
/*
* Wait until we're out of the critical section. This might
* give the wrong answer due to the lack of memory barriers.
@@ -108,6 +120,7 @@ EXPORT_SYMBOL(synchronize_hardirq);
static void __synchronize_irq(struct irq_desc *desc)
{
__synchronize_hardirq(desc, true);
+
/*
* We made sure that no hardirq handler is running. Now verify that no
* threaded handlers are active.
@@ -217,8 +230,7 @@ static inline void irq_validate_effective_affinity(struct irq_data *data) { }
static DEFINE_PER_CPU(struct cpumask, __tmp_mask);
-int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask);
struct irq_desc *desc = irq_data_to_desc(data);
--
2.51.0
Powered by blists - more mailing lists