lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <54f50b81-7361-4140-8b88-acd765fd8f49@intel.com>
Date: Tue, 4 Mar 2025 15:37:08 -0700
From: Ahmed Zaki <ahmed.zaki@...el.com>
To: "Arinzon, David" <darinzon@...zon.com>, "netdev@...r.kernel.org"
	<netdev@...r.kernel.org>
CC: "intel-wired-lan@...ts.osuosl.org" <intel-wired-lan@...ts.osuosl.org>,
	"andrew+netdev@...n.ch" <andrew+netdev@...n.ch>, "edumazet@...gle.com"
	<edumazet@...gle.com>, "kuba@...nel.org" <kuba@...nel.org>,
	"horms@...nel.org" <horms@...nel.org>, "pabeni@...hat.com"
	<pabeni@...hat.com>, "davem@...emloft.net" <davem@...emloft.net>,
	"michael.chan@...adcom.com" <michael.chan@...adcom.com>, "tariqt@...dia.com"
	<tariqt@...dia.com>, "anthony.l.nguyen@...el.com"
	<anthony.l.nguyen@...el.com>, "przemyslaw.kitszel@...el.com"
	<przemyslaw.kitszel@...el.com>, "jdamato@...tly.com" <jdamato@...tly.com>,
	"shayd@...dia.com" <shayd@...dia.com>, "akpm@...ux-foundation.org"
	<akpm@...ux-foundation.org>, "Allen, Neil" <shayagr@...zon.com>
Subject: Re: [Intel-wired-lan] [PATCH net-next v9 2/6] net: ena: use napi's
 aRFS rmap notifers

[RE-SEND] I just realized I sent this only to iwl, sorry for spamming.


On 2025-03-03 10:11 a.m., Arinzon, David wrote:
>> Use the core's rmap notifiers and delete our own.
>>
>> Acked-by: David Arinzon <darinzon@...zon.com>
>> Signed-off-by: Ahmed Zaki <ahmed.zaki@...el.com>
>> ---
>>   drivers/net/ethernet/amazon/ena/ena_netdev.c | 43 +-------------------
>>   1 file changed, 1 insertion(+), 42 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
>> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
>> index c1295dfad0d0..6aab85a7c60a 100644
>> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
>> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
>> @@ -5,9 +5,6 @@
>>
>>   #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>>
>> -#ifdef CONFIG_RFS_ACCEL
>> -#include <linux/cpu_rmap.h>
>> -#endif /* CONFIG_RFS_ACCEL */
>>   #include <linux/ethtool.h>
>>   #include <linux/kernel.h>
>>   #include <linux/module.h>
>> @@ -162,30 +159,6 @@ int ena_xmit_common(struct ena_adapter
>> *adapter,
>>          return 0;
>>   }
>>
>> -static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) -{ -#ifdef
>> CONFIG_RFS_ACCEL
>> -       u32 i;
>> -       int rc;
>> -
>> -       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter-
>>> num_io_queues);
>> -       if (!adapter->netdev->rx_cpu_rmap)
>> -               return -ENOMEM;
>> -       for (i = 0; i < adapter->num_io_queues; i++) {
>> -               int irq_idx = ENA_IO_IRQ_IDX(i);
>> -
>> -               rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
>> -                                     pci_irq_vector(adapter->pdev, irq_idx));
>> -               if (rc) {
>> -                       free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
>> -                       adapter->netdev->rx_cpu_rmap = NULL;
>> -                       return rc;
>> -               }
>> -       }
>> -#endif /* CONFIG_RFS_ACCEL */
>> -       return 0;
>> -}
>> -
>>   static void ena_init_io_rings_common(struct ena_adapter *adapter,
>>                                       struct ena_ring *ring, u16 qid)  { @@ -1596,7 +1569,7 @@
>> static int ena_enable_msix(struct ena_adapter *adapter)
>>                  adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
>>          }
>>
>> -       if (ena_init_rx_cpu_rmap(adapter))
>> +       if (netif_enable_cpu_rmap(adapter->netdev,
>> + adapter->num_io_queues))
>>                  netif_warn(adapter, probe, adapter->netdev,
>>                             "Failed to map IRQs to CPUs\n");
>>
>> @@ -1742,13 +1715,6 @@ static void ena_free_io_irq(struct ena_adapter
>> *adapter)
>>          struct ena_irq *irq;
>>          int i;
>>
>> -#ifdef CONFIG_RFS_ACCEL
>> -       if (adapter->msix_vecs >= 1) {
>> -               free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
>> -               adapter->netdev->rx_cpu_rmap = NULL;
>> -       }
>> -#endif /* CONFIG_RFS_ACCEL */
>> -
>>          for (i = ENA_IO_IRQ_FIRST_IDX; i <
>> ENA_MAX_MSIX_VEC(io_queue_count); i++) {
>>                  irq = &adapter->irq_tbl[i];
>>                  irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4097,6 @@
>> static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
>>          ena_dev = adapter->ena_dev;
>>          netdev = adapter->netdev;
>>
>> -#ifdef CONFIG_RFS_ACCEL
>> -       if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
>> -               free_irq_cpu_rmap(netdev->rx_cpu_rmap);
>> -               netdev->rx_cpu_rmap = NULL;
>> -       }
>> -
>> -#endif /* CONFIG_RFS_ACCEL */
>>          /* Make sure timer and reset routine won't be called after
>>           * freeing device resources.
>>           */
>> --
>> 2.43.0
> 
> Hi Ahmed,
> 
> After the merging of this patch, I see the below stack trace when the IRQs are freed.
> It can be reproduced by unloading and loading the driver using `modprobe -r ena; modprobe ena` (happens during unload)
> 
> Based on the patchset and the changes to other drivers, I think there's a missing call to the function
> that releases the affinity notifier (The warn is in https://web.git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/kernel/irq/manage.c#n2031)
> 
> I saw in the intel code in the patchset that ` netif_napi_set_irq(<napi>, -1);` is added?
> 
> After adding the code snippet I don't see this anymore, but I want to understand whether it's the right call by design.

Yes, in ena_down() the IRQs are freed before napis are deleted (where 
IRQ notifiers are released). The code below is fine (and is better IMO) 
but you can also delete napis then free IRQs.


> 
> @@ -1716,8 +1716,11 @@ static void ena_free_io_irq(struct ena_adapter *adapter)
>          int i;
> 
>          for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
> +               struct ena_napi *ena_napi;
> +
>                  irq = &adapter->irq_tbl[i];
>                  irq_set_affinity_hint(irq->vector, NULL);
> +               ena_napi = (struct ena_napi *)irq->data;
> +               netif_napi_set_irq(&ena_napi->napi, -1);
>                  free_irq(irq->vector, irq->data);
>          }
>   }
> 
> [  484.544586]  ? __warn+0x84/0x130
> [  484.544843]  ? free_irq+0x5c/0x70
> [  484.545105]  ? report_bug+0x18a/0x1a0
> [  484.545390]  ? handle_bug+0x53/0x90
> [  484.545664]  ? exc_invalid_op+0x14/0x70
> [  484.545959]  ? asm_exc_invalid_op+0x16/0x20
> [  484.546279]  ? free_irq+0x5c/0x70
> [  484.546545]  ? free_irq+0x10/0x70
> [  484.546807]  ena_free_io_irq+0x5f/0x70 [ena]
> [  484.547138]  ena_down+0x250/0x3e0 [ena]
> [  484.547435]  ena_destroy_device+0x118/0x150 [ena]
> [  484.547796]  __ena_shutoff+0x5a/0xe0 [ena]
> [  484.548110]  pci_device_remove+0x3b/0xb0
> [  484.548412]  device_release_driver_internal+0x193/0x200
> [  484.548804]  driver_detach+0x44/0x90
> [  484.549084]  bus_remove_driver+0x69/0xf0
> [  484.549386]  pci_unregister_driver+0x2a/0xb0
> [  484.549717]  ena_cleanup+0xc/0x130 [ena]
> [  484.550021]  __do_sys_delete_module.constprop.0+0x176/0x310
> [  484.550438]  ? syscall_trace_enter+0xfb/0x1c0
> [  484.550782]  do_syscall_64+0x5b/0x170
> [  484.551067]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Thanks,
> David


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ