[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d64145d6-966c-2e9d-beca-b8f896c8d2f0@nvidia.com>
Date: Tue, 10 Jan 2023 08:22:26 -0600
From: Shanker Donthineni <sdonthineni@...dia.com>
To: Marc Zyngier <maz@...nel.org>
Cc: Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>,
James Morse <james.morse@....com>,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] arm64: gic: increase the number of IRQ descriptors
Hi Marc,
On 1/10/23 02:20, Marc Zyngier wrote:
> External email: Use caution opening links or attachments
>
>
> On Mon, 09 Jan 2023 17:13:25 +0000,
> Shanker Donthineni <sdonthineni@...dia.com> wrote:
>>
>>>>> I'm happy to help with it, but I'm certainly not willing to accept any
>>>>> sort of new compile-time limit.
>>>>
>>>> Thanks for helping with a scalable solution instead of static
>>>> allocation. Please include me whenever patches posted to LKML. I'm
>>>> happy to verify on NVIDIA server platforms and provide test
>>>> feedback.
>>>>
>>>
>>> I offered to help you. I didn't offer to do the work for you! ;-)
>>>
>>
>> I've looked at the IDR/IDA API. There is no suitable function for
>> allocating contiguous IDs to replace bitmap API.
>>
>> __irq_alloc_descs():
>>
>> mutex_lock(&sparse_irq_lock);
>>
>> start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
>> from, cnt, 0);
>> ret = -EEXIST;
>>
>> Is there any existing API that I can use for allocating contiguous IDs?
>
> I think you should address the problem the other way around, as there
> are lower hanging fruits:
>
> - turn the irq_desc_tree radix tree into a XArray
>
> - use the XArray mark feature to reimplement the irqs_resend bitmap
>
> Once you have done that, you have already halved the memory usage.
> To implement the allocated_irqs bitmap functionality, you have a
> bunch of options:
>
> - make the XArray an allocating XArray, and iterate over XA_FREE_MARK
> to find the free range (see how the infiniband subsystem is doing
> exactly that)
>
> - use another Xarray mark to annotate the allocated IRQs, find the
> distance between two allocations, and use this range if the request
> fits (a poor man's variation of the above)
>
> - use a sideband data structure such as the GICv3 LPI allocator, which
> is already dealing with range allocation (I'd rather avoid that)
>
> - something else?
>
Thanks for providing the guidance. The irq_resend change will be simple,
IDR will fit perfectly. Could you comment on the below 2 patches which are
using IDR API?
One IDR variable is used for both the IRQ ID allocation & descriptoirs.
I'll test and post patches for comments if you're okay with the approach.
Patch 1/2:
genirq: Prepare code for IDR based allocation
Introduce helper functions for managing Linux IRQ IDs and
define for both SPARSE_IRQ and non-SPARSE_IRQ seperately.
There is no change in functional behavior.
Changes:
-Helper function irq_alloc_descs_ids() for allocatind IRQ IDs
-Helper function irq_free_descs_ids() to free IRQ IDs
-Helper function irq_get_next_id() to get next IRQ ID
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index fd0996274401..a40ac0c58550 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -131,7 +131,6 @@ int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);
static DEFINE_MUTEX(sparse_irq_lock);
-static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
#ifdef CONFIG_SPARSE_IRQ
@@ -344,6 +343,7 @@ static void irq_sysfs_del(struct irq_desc *desc) {}
#endif /* CONFIG_SYSFS */
+static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -469,6 +469,22 @@ static void free_desc(unsigned int irq)
call_rcu(&desc->rcu, delayed_free_desc);
}
+static void irq_free_descs_ids(unsigned int from, unsigned int cnt)
+{
+ bitmap_clear(allocated_irqs, from, cnt);
+}
+
+static int irq_alloc_descs_ids(unsigned int from, unsigned int cnt)
+{
+ return bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
+ from, cnt, 0);
+}
+
+static unsigned int irq_get_next_id(unsigned int offset)
+{
+ return find_next_bit(allocated_irqs, nr_irqs, offset);
+}
+
static int alloc_descs(unsigned int start, unsigned int cnt, int node,
const struct irq_affinity_desc *affinity,
struct module *owner)
@@ -553,6 +569,8 @@ int __init early_irq_init(void)
#else /* !CONFIG_SPARSE_IRQ */
+static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
+
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
.handle_irq = handle_bad_irq,
@@ -591,6 +609,22 @@ struct irq_desc *irq_to_desc(unsigned int irq)
}
EXPORT_SYMBOL(irq_to_desc);
+static void irq_free_descs_ids(unsigned int from, unsigned int cnt)
+{
+ bitmap_clear(allocated_irqs, from, cnt);
+}
+
+static int irq_alloc_descs_ids(unsigned int from, unsigned int cnt)
+{
+ return bitmap_find_next_zero_area(allocated_irqs, NR_IRQS,
+ from, cnt, 0);
+}
+
+static unsigned int irq_get_next_id(unsigned int offset)
+{
+ return find_next_bit(allocated_irqs, nr_irqs, offset);
+}
+
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -768,7 +802,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
for (i = 0; i < cnt; i++)
free_desc(from + i);
- bitmap_clear(allocated_irqs, from, cnt);
+ irq_free_descs_ids(from, cnt);
mutex_unlock(&sparse_irq_lock);
}
EXPORT_SYMBOL_GPL(irq_free_descs);
@@ -810,8 +844,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
mutex_lock(&sparse_irq_lock);
- start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
- from, cnt, 0);
+ start = irq_alloc_descs_ids(from, cnt);
ret = -EEXIST;
if (irq >=0 && start != irq)
goto unlock;
@@ -836,7 +869,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
*/
unsigned int irq_get_next_irq(unsigned int offset)
{
- return find_next_bit(allocated_irqs, nr_irqs, offset);
+ return irq_get_next_id(offset);
}
PATCH 2/2:
genirq: Use IDR API for Linux-IRQ IDs allocation
The build time config paramter IRQ_BITMAP_BITS (NR_IRQS+8196)
may not be sufficient for some architectures. The interrupt ID
sparse is huge for ARM-GIC architecture ~32 bits. Static bitmap
memory for managing IDs is not optimal when NR_IRQS is set to
a high value.
It uses the IDR API for the IRQ ID allocation/deallocation and
its descriptors management insertion/deletion/search. No other
references to macro IRQ_BITMAP_BITS hence remove it.
And also covert static allocation of the 'irqs_resend' bitmap
to dynamic allocation using IDR.
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 5fdc0b557579..501f90962644 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -11,12 +11,6 @@
#include <linux/pm_runtime.h>
#include <linux/sched/clock.h>
-#ifdef CONFIG_SPARSE_IRQ
-# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
-#else
-# define IRQ_BITMAP_BITS NR_IRQS
-#endif
-
#define istate core_internal_state__do_not_mess_with_it
extern bool noirqdebug;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a40ac0c58550..bb1febd3a420 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -343,25 +343,25 @@ static void irq_sysfs_del(struct irq_desc *desc) {}
#endif /* CONFIG_SYSFS */
-static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
-static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
+static DEFINE_IDR(idr_irq_descs);
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
{
- radix_tree_insert(&irq_desc_tree, irq, desc);
+ idr_replace(&idr_irq_descs, desc, irq);
}
struct irq_desc *irq_to_desc(unsigned int irq)
{
- return radix_tree_lookup(&irq_desc_tree, irq);
+ return idr_find(&idr_irq_descs, irq);
}
+
#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
EXPORT_SYMBOL_GPL(irq_to_desc);
#endif
static void delete_irq_desc(unsigned int irq)
{
- radix_tree_delete(&irq_desc_tree, irq);
+ idr_replace(&idr_irq_descs, NULL, irq);
}
#ifdef CONFIG_SMP
@@ -471,18 +471,48 @@ static void free_desc(unsigned int irq)
static void irq_free_descs_ids(unsigned int from, unsigned int cnt)
{
- bitmap_clear(allocated_irqs, from, cnt);
+ int i;
+
+ for (i = 0; i < cnt; i++)
+ idr_remove(&idr_irq_descs, from + i);
}
static int irq_alloc_descs_ids(unsigned int from, unsigned int cnt)
{
- return bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
- from, cnt, 0);
+ int start, id, i;
+
+ do {
+ /* Allocate starting ID */
+ start = idr_alloc(&idr_irq_descs, NULL, from, 0, GFP_ATOMIC);
+ if (start < 0)
+ return start;
+ idr_set_cursor(&idr_irq_descs, start);
+
+ /* Allocate contiguous IDs */
+ for (i = 1; i < cnt; i++) {
+ id = idr_alloc_cyclic(&idr_irq_descs, NULL, start + i,
+ start + i, GFP_ATOMIC);
+ if (id < 0) {
+ irq_free_descs_ids(from, i);
+ break;
+ }
+ }
+
+ /* Allocated 'cnt' IDs */
+ if (i == cnt)
+ return start;
+ from = idr_get_cursor(&idr_irq_descs);
+ } while (from < INT_MAX);
+
+ irq_free_descs_ids(start, i);
+ return -ENOSPC;
}
static unsigned int irq_get_next_id(unsigned int offset)
{
- return find_next_bit(allocated_irqs, nr_irqs, offset);
+ int id;
+
+ return idr_get_next(&idr_irqs, &id) ? id : -EINVAL;
}
static int alloc_descs(unsigned int start, unsigned int cnt, int node,
@@ -521,7 +551,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
irq_sysfs_add(start + i, desc);
irq_add_debugfs_entry(start + i, desc);
}
- bitmap_set(allocated_irqs, start, cnt);
return start;
err:
@@ -532,8 +561,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
static int irq_expand_nr_irqs(unsigned int nr)
{
- if (nr > IRQ_BITMAP_BITS)
- return -ENOMEM;
nr_irqs = nr;
return 0;
}
@@ -542,6 +569,7 @@ int __init early_irq_init(void)
{
int i, initcnt, node = first_online_node;
struct irq_desc *desc;
+ int irq;
init_irq_default_affinity();
@@ -550,19 +578,10 @@ int __init early_irq_init(void)
printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
NR_IRQS, nr_irqs, initcnt);
- if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
- nr_irqs = IRQ_BITMAP_BITS;
-
- if (WARN_ON(initcnt > IRQ_BITMAP_BITS))
- initcnt = IRQ_BITMAP_BITS;
-
- if (initcnt > nr_irqs)
- nr_irqs = initcnt;
-
for (i = 0; i < initcnt; i++) {
- desc = alloc_desc(i, node, 0, NULL, NULL);
- set_bit(i, allocated_irqs);
- irq_insert_desc(i, desc);
+ irq = irq_alloc_descs_ids(0, 1);
+ desc = alloc_desc(irq, node, 0, NULL, NULL);
+ irq_insert_desc(irq, desc);
}
return arch_early_irq_init();
}
@@ -855,6 +874,8 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
goto unlock;
}
ret = alloc_descs(start, cnt, node, affinity, owner);
+ if (ret != start)
+ irq_free_descs_ids(start, cnt);
unlock:
mutex_unlock(&sparse_irq_lock);
return ret;
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 0c46e9fe3a89..1c9db8e03fba 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -21,8 +21,8 @@
#ifdef CONFIG_HARDIRQS_SW_RESEND
-/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
+/* IDR map to handle software resend of interrupts: */
+static DEFINE_IDR(irqs_resend);
/*
* Run software resends of IRQ's
@@ -30,14 +30,11 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
static void resend_irqs(struct tasklet_struct *unused)
{
struct irq_desc *desc;
- int irq;
-
- while (!bitmap_empty(irqs_resend, nr_irqs)) {
- irq = find_first_bit(irqs_resend, nr_irqs);
- clear_bit(irq, irqs_resend);
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
+ int id;
+
+ idr_for_each_entry(&irqs_resend, desc, id) {
+ idr_replace(&irqs_resend, NULL, id);
+ idr_remove(&irqs_resend, id);
local_irq_disable();
desc->handle_irq(desc);
local_irq_enable();
@@ -49,7 +46,7 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);
static int irq_sw_resend(struct irq_desc *desc)
{
- unsigned int irq = irq_desc_get_irq(desc);
+ int id;
/*
* Validate whether this interrupt can be safely injected from
@@ -70,11 +67,13 @@ static int irq_sw_resend(struct irq_desc *desc)
*/
if (!desc->parent_irq)
return -EINVAL;
- irq = desc->parent_irq;
}
/* Set it pending and activate the softirq: */
- set_bit(irq, irqs_resend);
+ id = idr_alloc(&irqs_resend, desc, 0, 0, GFP_ATOMIC);
+ if (id < 0)
+ return id;
+
tasklet_schedule(&resend_tasklet);
return 0;
}
Powered by blists - more mailing lists