[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4517d3801ecb465785f7c883554c9d7f@amazon.com>
Date: Tue, 14 Jan 2025 09:33:21 +0000
From: "Arinzon, David" <darinzon@...zon.com>
To: Ahmed Zaki <ahmed.zaki@...el.com>, "netdev@...r.kernel.org"
<netdev@...r.kernel.org>
CC: "intel-wired-lan@...ts.osuosl.org" <intel-wired-lan@...ts.osuosl.org>,
"andrew+netdev@...n.ch" <andrew+netdev@...n.ch>, "edumazet@...gle.com"
<edumazet@...gle.com>, "kuba@...nel.org" <kuba@...nel.org>,
"horms@...nel.org" <horms@...nel.org>, "pabeni@...hat.com"
<pabeni@...hat.com>, "davem@...emloft.net" <davem@...emloft.net>,
"michael.chan@...adcom.com" <michael.chan@...adcom.com>, "tariqt@...dia.com"
<tariqt@...dia.com>, "anthony.l.nguyen@...el.com"
<anthony.l.nguyen@...el.com>, "przemyslaw.kitszel@...el.com"
<przemyslaw.kitszel@...el.com>, "jdamato@...tly.com" <jdamato@...tly.com>,
"shayd@...dia.com" <shayd@...dia.com>, "akpm@...ux-foundation.org"
<akpm@...ux-foundation.org>, "Agroskin, Shay" <shayagr@...zon.com>,
"kalesh-anakkur.purayil@...adcom.com" <kalesh-anakkur.purayil@...adcom.com>,
"pavan.chebbi@...adcom.com" <pavan.chebbi@...adcom.com>,
"yury.norov@...il.com" <yury.norov@...il.com>
Subject: RE: [PATCH net-next v5 1/6] net: move ARFS rmap management to core
> Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should
> set the flag via netif_enable_cpu_rmap() and core will allocate and manage
> the ARFS rmap. Freeing the rmap is also done by core when the netdev is
> freed.
>
> Signed-off-by: Ahmed Zaki <ahmed.zaki@...el.com>
> ---
> drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++---------------
> drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++----------
> drivers/net/ethernet/intel/ice/ice_arfs.c | 17 +-------
> include/linux/netdevice.h | 15 +++++--
> net/core/dev.c | 44 ++++++++++++++++++++
> 5 files changed, 63 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index c1295dfad0d0..a3fceaa83cd5 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -5,9 +5,6 @@
>
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
> -#ifdef CONFIG_RFS_ACCEL
> -#include <linux/cpu_rmap.h>
> -#endif /* CONFIG_RFS_ACCEL */
> #include <linux/ethtool.h>
> #include <linux/kernel.h>
> #include <linux/module.h>
> @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter
> *adapter, static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) {
> #ifdef CONFIG_RFS_ACCEL
> - u32 i;
> - int rc;
> -
> - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter-
> >num_io_queues);
> - if (!adapter->netdev->rx_cpu_rmap)
> - return -ENOMEM;
> - for (i = 0; i < adapter->num_io_queues; i++) {
> - int irq_idx = ENA_IO_IRQ_IDX(i);
> -
> - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
> - pci_irq_vector(adapter->pdev, irq_idx));
> - if (rc) {
> - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> - adapter->netdev->rx_cpu_rmap = NULL;
> - return rc;
> - }
> - }
> -#endif /* CONFIG_RFS_ACCEL */
> + return netif_enable_cpu_rmap(adapter->netdev,
> +adapter->num_io_queues); #else
> return 0;
> +#endif /* CONFIG_RFS_ACCEL */
> }
>
> static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ -
> 1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter
> *adapter)
> struct ena_irq *irq;
> int i;
>
> -#ifdef CONFIG_RFS_ACCEL
> - if (adapter->msix_vecs >= 1) {
> - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> - adapter->netdev->rx_cpu_rmap = NULL;
> - }
> -#endif /* CONFIG_RFS_ACCEL */
> -
> for (i = ENA_IO_IRQ_FIRST_IDX; i <
> ENA_MAX_MSIX_VEC(io_queue_count); i++) {
> irq = &adapter->irq_tbl[i];
> irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@
> static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
> ena_dev = adapter->ena_dev;
> netdev = adapter->netdev;
>
> -#ifdef CONFIG_RFS_ACCEL
> - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
> - free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> - netdev->rx_cpu_rmap = NULL;
> - }
> -
> -#endif /* CONFIG_RFS_ACCEL */
> /* Make sure timer and reset routine won't be called after
> * freeing device resources.
> */
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 884d42db5554..1f50bc715038 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -49,7 +49,6 @@
> #include <linux/cache.h>
> #include <linux/log2.h>
> #include <linux/bitmap.h>
> -#include <linux/cpu_rmap.h>
> #include <linux/cpumask.h>
> #include <net/pkt_cls.h>
> #include <net/page_pool/helpers.h>
> @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt
> *bp)
>
> #ifdef CONFIG_RFS_ACCEL
> if (bp->flags & BNXT_FLAG_RFS)
> - dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings);
> + return netif_enable_cpu_rmap(dev, bp->rx_nr_rings);
> #endif
>
> return rc;
> @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp)
> struct bnxt_irq *irq;
> int i;
>
> -#ifdef CONFIG_RFS_ACCEL
> - free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
> - bp->dev->rx_cpu_rmap = NULL;
> -#endif
> if (!bp->irq_tbl || !bp->bnapi)
> return;
>
> @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp)
>
> static int bnxt_request_irq(struct bnxt *bp) {
> - int i, j, rc = 0;
> + int i, rc = 0;
> unsigned long flags = 0;
> -#ifdef CONFIG_RFS_ACCEL
> - struct cpu_rmap *rmap;
> -#endif
>
> rc = bnxt_setup_int_mode(bp);
> if (rc) {
> @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp)
> rc);
> return rc;
> }
> -#ifdef CONFIG_RFS_ACCEL
> - rmap = bp->dev->rx_cpu_rmap;
> -#endif
> - for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
> +
> + for (i = 0; i < bp->cp_nr_rings; i++) {
> int map_idx = bnxt_cp_num_to_irq_num(bp, i);
> struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
>
> -#ifdef CONFIG_RFS_ACCEL
> - if (rmap && bp->bnapi[i]->rx_ring) {
> - rc = irq_cpu_rmap_add(rmap, irq->vector);
> - if (rc)
> - netdev_warn(bp->dev, "failed adding irq rmap for ring
> %d\n",
> - j);
> - j++;
> - }
> -#endif
> rc = request_irq(irq->vector, irq->handler, flags, irq->name,
> bp->bnapi[i]);
> if (rc)
> diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c
> b/drivers/net/ethernet/intel/ice/ice_arfs.c
> index 7cee365cc7d1..3b1b892e6958 100644
> --- a/drivers/net/ethernet/intel/ice/ice_arfs.c
> +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
> @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
> netdev = vsi->netdev;
> if (!netdev || !netdev->rx_cpu_rmap)
> return;
> -
> - free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> - netdev->rx_cpu_rmap = NULL;
> }
>
> /**
> @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) {
> struct net_device *netdev;
> struct ice_pf *pf;
> - int i;
>
> if (!vsi || vsi->type != ICE_VSI_PF)
> return 0;
> @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
> netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s,
> q_vectors %d\n",
> vsi->type, netdev->name, vsi->num_q_vectors);
>
> - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
> - if (unlikely(!netdev->rx_cpu_rmap))
> - return -EINVAL;
> -
> - ice_for_each_q_vector(vsi, i)
> - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
> - vsi->q_vectors[i]->irq.virq)) {
> - ice_free_cpu_rx_rmap(vsi);
> - return -EINVAL;
> - }
> -
> - return 0;
> + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors);
> }
>
> /**
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index
> aeb4a6cff171..7e95e9ee36dd 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1991,6 +1991,9 @@ enum netdev_reg_state {
> *
> * @threaded: napi threaded mode is enabled
> *
> + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
> + * Set by calling netif_enable_cpu_rmap().
> + *
> * @see_all_hwtstamp_requests: device wants to see calls to
> * ndo_hwtstamp_set() for all timestamp requests
> * regardless of source, even if those aren't
> @@ -2398,6 +2401,9 @@ struct net_device {
> struct lock_class_key *qdisc_tx_busylock;
> bool proto_down;
> bool threaded;
> +#ifdef CONFIG_RFS_ACCEL
> + bool rx_cpu_rmap_auto;
> +#endif
>
> /* priv_flags_slow, ungrouped to save space */
> unsigned long see_all_hwtstamp_requests:1;
> @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index,
> enum netdev_queue_type type,
> struct napi_struct *napi);
>
> -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{
> - napi->irq = irq;
> -}
> +void netif_napi_set_irq(struct napi_struct *napi, int irq);
>
> /* Default NAPI poll() weight
> * Device drivers are strongly advised to not use bigger value @@ -2765,6
> +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi)
> synchronize_net();
> }
>
> +#ifdef CONFIG_RFS_ACCEL
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs);
> +
> +#endif
> struct packet_type {
> __be16 type; /* This is really htons(ether_type). */
> bool ignore_outgoing;
> diff --git a/net/core/dev.c b/net/core/dev.c index
> 1a90ed8cc6cc..3ee7a514dca8 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index, }
> EXPORT_SYMBOL(netif_queue_set_napi);
>
> +#ifdef CONFIG_RFS_ACCEL
> +static void netif_disable_cpu_rmap(struct net_device *dev) {
> + free_irq_cpu_rmap(dev->rx_cpu_rmap);
> + dev->rx_cpu_rmap = NULL;
> + dev->rx_cpu_rmap_auto = false;
> +}
> +
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs) {
> + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
> + if (!dev->rx_cpu_rmap)
> + return -ENOMEM;
> +
> + dev->rx_cpu_rmap_auto = true;
> + return 0;
> +}
> +EXPORT_SYMBOL(netif_enable_cpu_rmap);
> +#endif
> +
> +void netif_napi_set_irq(struct napi_struct *napi, int irq) { #ifdef
> +CONFIG_RFS_ACCEL
> + int rc;
> +#endif
> + napi->irq = irq;
> +
> +#ifdef CONFIG_RFS_ACCEL
> + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) {
> + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq);
> + if (rc) {
> + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
> + rc);
> + netif_disable_cpu_rmap(napi->dev);
> + }
> + }
> +#endif
> +}
> +EXPORT_SYMBOL(netif_napi_set_irq);
> +
> static void napi_restore_config(struct napi_struct *n) {
> n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6
> +11461,10 @@ void free_netdev(struct net_device *dev)
> /* Flush device addresses */
> dev_addr_flush(dev);
>
> +#ifdef CONFIG_RFS_ACCEL
> + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto)
> + netif_disable_cpu_rmap(dev); #endif
> list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
> netif_napi_del(p);
>
> --
> 2.43.0
Thanks for making the change in the ENA driver.
Acked-by: David Arinzon <darinzon@...zon.com>
Powered by blists - more mailing lists