linux-kernel - Re: Affinity managed interrupts vs non-managed interrupts

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f436fa6a-b865-fadb-4f77-44309ff84405@163.com>
Date:   Wed, 5 Sep 2018 13:46:16 +0800
From:   Dou Liyang <dou_liyang@....com>
To:     Kashyap Desai <kashyap.desai@...adcom.com>,
        Thomas Gleixner <tglx@...utronix.de>
Cc:     Ming Lei <tom.leiming@...il.com>,
        Sumit Saxena <sumit.saxena@...adcom.com>,
        Ming Lei <ming.lei@...hat.com>, Christoph Hellwig <hch@....de>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        Shivasharan Srikanteshwara 
        <shivasharan.srikanteshwara@...adcom.com>,
        linux-block <linux-block@...r.kernel.org>,
        Dou Liyang <douly.fnst@...fujitsu.com>
Subject: Re: Affinity managed interrupts vs non-managed interrupts

Hi Thomas, Kashyap,

At 09/04/2018 06:29 PM, Kashyap Desai wrote:
>>> I am using " for-4.19/block " and this particular patch "a0c9259
>>> irq/matrix: Spread interrupts on allocation" is included.
>>

IMO, this patch is just used for non-managed interrupts.

>> So if all 16 have their effective affinity set to CPU0 then that's
> strange

But, all these 16 are managed interrupts, and will be assigned vectors
by assign_managed_vector():
{
     cpumask_and(vector_searchmask, vector_searchmask, affmsk);
     cpu = cpumask_first(vector_searchmask);

     ...
     vector = irq_matrix_alloc_managed(vector_matrix, cpu);
     ...
}

Where we always used the *first* cpu in the vector_searchmask(0-71), not
the suitable one. So I guess this situation happened.

Shall we also spread the managed interrupts on allocation?

Thanks,
     dou
-----------------8<----------------------------------------

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 9f148e3d45b4..57dc05691f44 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -314,13 +314,12 @@ assign_managed_vector(struct irq_data *irqd, const 
struct cpumask *dest)
         int vector, cpu;

         cpumask_and(vector_searchmask, vector_searchmask, affmsk);
-       cpu = cpumask_first(vector_searchmask);
-       if (cpu >= nr_cpu_ids)
-               return -EINVAL;
+
         /* set_affinity might call here for nothing */
         if (apicd->vector && cpumask_test_cpu(apicd->cpu, 
vector_searchmask))
                 return 0;
-       vector = irq_matrix_alloc_managed(vector_matrix, cpu);
+
+       vector = irq_matrix_alloc_managed(vector_matrix, 
vector_searchmask, &cpu);
         trace_vector_alloc_managed(irqd->irq, vector, vector);
         if (vector < 0)
                 return vector;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 201de12a9957..36fdeff5043a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1151,7 +1151,8 @@ void irq_matrix_offline(struct irq_matrix *m);
  void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, 
bool replace);
  int irq_matrix_reserve_managed(struct irq_matrix *m, const struct 
cpumask *msk);
  void irq_matrix_remove_managed(struct irq_matrix *m, const struct 
cpumask *msk);
-int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu);
+int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask 
*msk,
+                                       unsigned int *mapped_cpu);
  void irq_matrix_reserve(struct irq_matrix *m);
  void irq_matrix_remove_reserved(struct irq_matrix *m);
  int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 5092494bf261..d9e4e0a385fa 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -239,21 +239,40 @@ void irq_matrix_remove_managed(struct irq_matrix 
*m, const struct cpumask *msk)
   * @m:         Matrix pointer
   * @cpu:       On which CPU the interrupt should be allocated
   */
-int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu)
+int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask 
*msk,
+                                       unsigned int *mapped_cpu)
  {
-       struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
-       unsigned int bit, end = m->alloc_end;
-
-       /* Get managed bit which are not allocated */
-       bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end);
-       bit = find_first_bit(m->scratch_map, end);
-       if (bit >= end)
-               return -ENOSPC;
-       set_bit(bit, cm->alloc_map);
-       cm->allocated++;
-       m->total_allocated++;
-       trace_irq_matrix_alloc_managed(bit, cpu, m, cm);
-       return bit;
+       unsigned int cpu, best_cpu, maxavl = 0;
+       unsigned int bit, end;
+       struct cpumap *cm;
+
+       best_cpu = UINT_MAX;
+       for_each_cpu(cpu, msk) {
+               cm = per_cpu_ptr(m->maps, cpu);
+
+               if (!cm->online || cm->available <= maxavl)
+                       continue;
+
+               best_cpu = cpu;
+               maxavl = cm->available;
+       }
+
+       if (maxavl) {
+               cm = per_cpu_ptr(m->maps, best_cpu);
+               end = m->alloc_end;
+               /* Get managed bit which are not allocated */
+               bitmap_andnot(m->scratch_map, cm->managed_map, 
cm->alloc_map, end);
+               bit = find_first_bit(m->scratch_map, end);
+               if (bit >= end)
+                       return -ENOSPC;
+               set_bit(bit, cm->alloc_map);
+               cm->allocated++;
+               m->total_allocated++;
+               *mapped_cpu = best_cpu;
+               trace_irq_matrix_alloc_managed(bit, cpu, m, cm);
+               return bit;
+       }
+       return -ENOSPC;
  }

  /**