[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iKQyGhx+jbP5pNSqTU4dsweH-1r9LEkdG6CHDUaicS0nA@mail.gmail.com>
Date: Mon, 12 Jan 2026 21:18:45 +0100
From: Eric Dumazet <edumazet@...gle.com>
To: Luigi Rizzo <lrizzo@...gle.com>
Cc: tglx@...utronix.de, maz@...nel.org, rizzo.unipi@...il.com,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] genirq: move clear of kstat_irqs to free_desc()
On Mon, Jan 12, 2026 at 9:32 AM Luigi Rizzo <lrizzo@...gle.com> wrote:
>
> desc_set_defaults() has a loop to clear the per-cpu counters kstats_irq.
>
> This is only needed in free_desc(), which is used with non-sparse IRQs
> so that the irq_desc can be recycled. For newly allocated irq_desc,
> the memory comes from alloc_percpu() and is already zeroed out.
>
> Move the loop to free_desc() to avoid wasting time unnecessarily.
>
> This is especially important on large servers with 100+ CPUs, because
> each write results in a cache miss, and the write buffer can only have
> so many outstanding transactions.
>
> Below is an example of cost on a host with 480 CPUs, taken with
> local_irq_save()/restore() around the code to avoid interference.
> Measurements taken with kstats
> https://github.com/luigirizzo/lr-cstats/tree/main/kstats
>
> BUCKET SAMPLES AVG TIME(ns) PERCENTILE
>
> 40 3 2432 0.000366
> 41 3 3000 0.000732
> 42 24 3241 0.003662
> 43 33 3971 0.007690
> 44 963 4742 0.125244
> 45 1071 5545 0.255981
> 46 494 6644 0.316284
> 47 352 7661 0.359252
> 48 816 9447 0.458862
> 49 2214 11493 0.729125
> 50 1440 13027 0.904907
> 51 428 15219 0.957153
> 52 275 18211 0.990722
> 53 69 21396 0.999145
> 54 4 26125 0.999633
> 55 1 28996 0.999755
> 56 2 37253 1.000000
>
> Signed-off-by: Luigi Rizzo <lrizzo@...gle.com>
> ---
> kernel/irq/irqdesc.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index f8e4e13dbe339..ec963174e7e27 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -115,8 +115,6 @@ static inline void free_masks(struct irq_desc *desc) { }
> static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
> const struct cpumask *affinity, struct module *owner)
> {
> - int cpu;
> -
> desc->irq_common_data.handler_data = NULL;
> desc->irq_common_data.msi_desc = NULL;
>
> @@ -134,8 +132,6 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
> desc->tot_count = 0;
> desc->name = NULL;
> desc->owner = owner;
> - for_each_possible_cpu(cpu)
> - *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
> desc_smp_init(desc, node, affinity);
> }
>
> @@ -621,9 +617,13 @@ EXPORT_SYMBOL(irq_to_desc);
> static void free_desc(unsigned int irq)
> {
> struct irq_desc *desc = irq_to_desc(irq);
> + int cpu;
>
> - scoped_guard(raw_spinlock_irqsave, &desc->lock)
> + scoped_guard(raw_spinlock_irqsave, &desc->lock) {
> desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
> + for_each_possible_cpu(cpu)
> + *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
> + }
It seems that for_each_possible_cpu(cpu) could be done outside of the
desc->lock protection.
This would shorten hard-irq blocking by N cache line misses.
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f8e4e13dbe33965b8ede1872515596eb64dfdb74..577fb0ff4a328d44cef93922f41f8d200d12bbb1
100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -621,9 +621,14 @@ EXPORT_SYMBOL(irq_to_desc);
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
+ int cpu;
scoped_guard(raw_spinlock_irqsave, &desc->lock)
desc_set_defaults(irq, desc, irq_desc_get_node(desc),
NULL, NULL);
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
+
delete_irq_desc(irq);
}
Powered by blists - more mailing lists