lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iKQyGhx+jbP5pNSqTU4dsweH-1r9LEkdG6CHDUaicS0nA@mail.gmail.com>
Date: Mon, 12 Jan 2026 21:18:45 +0100
From: Eric Dumazet <edumazet@...gle.com>
To: Luigi Rizzo <lrizzo@...gle.com>
Cc: tglx@...utronix.de, maz@...nel.org, rizzo.unipi@...il.com, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] genirq: move clear of kstat_irqs to free_desc()

On Mon, Jan 12, 2026 at 9:32 AM Luigi Rizzo <lrizzo@...gle.com> wrote:
>
> desc_set_defaults() has a loop to clear the per-cpu counters kstats_irq.
>
> This is only needed in free_desc(), which is used with non-sparse IRQs
> so that the irq_desc can be recycled. For newly allocated irq_desc,
> the memory comes from alloc_percpu() and is already zeroed out.
>
> Move the loop to free_desc() to avoid wasting time unnecessarily.
>
> This is especially important on large servers with 100+ CPUs, because
> each write results in a cache miss, and the write buffer can only have
> so many outstanding transactions.
>
> Below is an example of cost on a host with 480 CPUs, taken with
> local_irq_save()/restore() around the code to avoid interference.
> Measurements taken with kstats
> https://github.com/luigirizzo/lr-cstats/tree/main/kstats
>
> BUCKET  SAMPLES  AVG TIME(ns)  PERCENTILE
>
>  40           3         2432   0.000366
>  41           3         3000   0.000732
>  42          24         3241   0.003662
>  43          33         3971   0.007690
>  44         963         4742   0.125244
>  45        1071         5545   0.255981
>  46         494         6644   0.316284
>  47         352         7661   0.359252
>  48         816         9447   0.458862
>  49        2214        11493   0.729125
>  50        1440        13027   0.904907
>  51         428        15219   0.957153
>  52         275        18211   0.990722
>  53          69        21396   0.999145
>  54           4        26125   0.999633
>  55           1        28996   0.999755
>  56           2        37253   1.000000
>
> Signed-off-by: Luigi Rizzo <lrizzo@...gle.com>
> ---
>  kernel/irq/irqdesc.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index f8e4e13dbe339..ec963174e7e27 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -115,8 +115,6 @@ static inline void free_masks(struct irq_desc *desc) { }
>  static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
>                               const struct cpumask *affinity, struct module *owner)
>  {
> -       int cpu;
> -
>         desc->irq_common_data.handler_data = NULL;
>         desc->irq_common_data.msi_desc = NULL;
>
> @@ -134,8 +132,6 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
>         desc->tot_count = 0;
>         desc->name = NULL;
>         desc->owner = owner;
> -       for_each_possible_cpu(cpu)
> -               *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
>         desc_smp_init(desc, node, affinity);
>  }
>
> @@ -621,9 +617,13 @@ EXPORT_SYMBOL(irq_to_desc);
>  static void free_desc(unsigned int irq)
>  {
>         struct irq_desc *desc = irq_to_desc(irq);
> +       int cpu;
>
> -       scoped_guard(raw_spinlock_irqsave, &desc->lock)
> +       scoped_guard(raw_spinlock_irqsave, &desc->lock) {
>                 desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
> +               for_each_possible_cpu(cpu)
> +                       *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
> +       }


It seems that for_each_possible_cpu(cpu) could be done outside of the
desc->lock protection.

This would shorten hard-irq blocking by N cache line misses.

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f8e4e13dbe33965b8ede1872515596eb64dfdb74..577fb0ff4a328d44cef93922f41f8d200d12bbb1
100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -621,9 +621,14 @@ EXPORT_SYMBOL(irq_to_desc);
 static void free_desc(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
+       int cpu;

        scoped_guard(raw_spinlock_irqsave, &desc->lock)
                desc_set_defaults(irq, desc, irq_desc_get_node(desc),
NULL, NULL);
+
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
+
        delete_irq_desc(irq);
 }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ