lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4517d3801ecb465785f7c883554c9d7f@amazon.com>
Date: Tue, 14 Jan 2025 09:33:21 +0000
From: "Arinzon, David" <darinzon@...zon.com>
To: Ahmed Zaki <ahmed.zaki@...el.com>, "netdev@...r.kernel.org"
	<netdev@...r.kernel.org>
CC: "intel-wired-lan@...ts.osuosl.org" <intel-wired-lan@...ts.osuosl.org>,
	"andrew+netdev@...n.ch" <andrew+netdev@...n.ch>, "edumazet@...gle.com"
	<edumazet@...gle.com>, "kuba@...nel.org" <kuba@...nel.org>,
	"horms@...nel.org" <horms@...nel.org>, "pabeni@...hat.com"
	<pabeni@...hat.com>, "davem@...emloft.net" <davem@...emloft.net>,
	"michael.chan@...adcom.com" <michael.chan@...adcom.com>, "tariqt@...dia.com"
	<tariqt@...dia.com>, "anthony.l.nguyen@...el.com"
	<anthony.l.nguyen@...el.com>, "przemyslaw.kitszel@...el.com"
	<przemyslaw.kitszel@...el.com>, "jdamato@...tly.com" <jdamato@...tly.com>,
	"shayd@...dia.com" <shayd@...dia.com>, "akpm@...ux-foundation.org"
	<akpm@...ux-foundation.org>, "Agroskin, Shay" <shayagr@...zon.com>,
	"kalesh-anakkur.purayil@...adcom.com" <kalesh-anakkur.purayil@...adcom.com>,
	"pavan.chebbi@...adcom.com" <pavan.chebbi@...adcom.com>,
	"yury.norov@...il.com" <yury.norov@...il.com>
Subject: RE: [PATCH net-next v5 1/6] net: move ARFS rmap management to core

> Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should
> set the flag via netif_enable_cpu_rmap() and core will allocate and manage
> the ARFS rmap. Freeing the rmap is also done by core when the netdev is
> freed.
> 
> Signed-off-by: Ahmed Zaki <ahmed.zaki@...el.com>
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++---------------
>  drivers/net/ethernet/broadcom/bnxt/bnxt.c    | 27 ++----------
>  drivers/net/ethernet/intel/ice/ice_arfs.c    | 17 +-------
>  include/linux/netdevice.h                    | 15 +++++--
>  net/core/dev.c                               | 44 ++++++++++++++++++++
>  5 files changed, 63 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index c1295dfad0d0..a3fceaa83cd5 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -5,9 +5,6 @@
> 
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> 
> -#ifdef CONFIG_RFS_ACCEL
> -#include <linux/cpu_rmap.h>
> -#endif /* CONFIG_RFS_ACCEL */
>  #include <linux/ethtool.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter
> *adapter,  static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)  {
> #ifdef CONFIG_RFS_ACCEL
> -       u32 i;
> -       int rc;
> -
> -       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter-
> >num_io_queues);
> -       if (!adapter->netdev->rx_cpu_rmap)
> -               return -ENOMEM;
> -       for (i = 0; i < adapter->num_io_queues; i++) {
> -               int irq_idx = ENA_IO_IRQ_IDX(i);
> -
> -               rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
> -                                     pci_irq_vector(adapter->pdev, irq_idx));
> -               if (rc) {
> -                       free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> -                       adapter->netdev->rx_cpu_rmap = NULL;
> -                       return rc;
> -               }
> -       }
> -#endif /* CONFIG_RFS_ACCEL */
> +       return netif_enable_cpu_rmap(adapter->netdev,
> +adapter->num_io_queues); #else
>         return 0;
> +#endif /* CONFIG_RFS_ACCEL */
>  }
> 
>  static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ -
> 1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter
> *adapter)
>         struct ena_irq *irq;
>         int i;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       if (adapter->msix_vecs >= 1) {
> -               free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> -               adapter->netdev->rx_cpu_rmap = NULL;
> -       }
> -#endif /* CONFIG_RFS_ACCEL */
> -
>         for (i = ENA_IO_IRQ_FIRST_IDX; i <
> ENA_MAX_MSIX_VEC(io_queue_count); i++) {
>                 irq = &adapter->irq_tbl[i];
>                 irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@
> static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
>         ena_dev = adapter->ena_dev;
>         netdev = adapter->netdev;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
> -               free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> -               netdev->rx_cpu_rmap = NULL;
> -       }
> -
> -#endif /* CONFIG_RFS_ACCEL */
>         /* Make sure timer and reset routine won't be called after
>          * freeing device resources.
>          */
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 884d42db5554..1f50bc715038 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -49,7 +49,6 @@
>  #include <linux/cache.h>
>  #include <linux/log2.h>
>  #include <linux/bitmap.h>
> -#include <linux/cpu_rmap.h>
>  #include <linux/cpumask.h>
>  #include <net/pkt_cls.h>
>  #include <net/page_pool/helpers.h>
> @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt
> *bp)
> 
>  #ifdef CONFIG_RFS_ACCEL
>         if (bp->flags & BNXT_FLAG_RFS)
> -               dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings);
> +               return netif_enable_cpu_rmap(dev, bp->rx_nr_rings);
>  #endif
> 
>         return rc;
> @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp)
>         struct bnxt_irq *irq;
>         int i;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
> -       bp->dev->rx_cpu_rmap = NULL;
> -#endif
>         if (!bp->irq_tbl || !bp->bnapi)
>                 return;
> 
> @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp)
> 
>  static int bnxt_request_irq(struct bnxt *bp)  {
> -       int i, j, rc = 0;
> +       int i, rc = 0;
>         unsigned long flags = 0;
> -#ifdef CONFIG_RFS_ACCEL
> -       struct cpu_rmap *rmap;
> -#endif
> 
>         rc = bnxt_setup_int_mode(bp);
>         if (rc) {
> @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp)
>                            rc);
>                 return rc;
>         }
> -#ifdef CONFIG_RFS_ACCEL
> -       rmap = bp->dev->rx_cpu_rmap;
> -#endif
> -       for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
> +
> +       for (i = 0; i < bp->cp_nr_rings; i++) {
>                 int map_idx = bnxt_cp_num_to_irq_num(bp, i);
>                 struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
> 
> -#ifdef CONFIG_RFS_ACCEL
> -               if (rmap && bp->bnapi[i]->rx_ring) {
> -                       rc = irq_cpu_rmap_add(rmap, irq->vector);
> -                       if (rc)
> -                               netdev_warn(bp->dev, "failed adding irq rmap for ring
> %d\n",
> -                                           j);
> -                       j++;
> -               }
> -#endif
>                 rc = request_irq(irq->vector, irq->handler, flags, irq->name,
>                                  bp->bnapi[i]);
>                 if (rc)
> diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c
> b/drivers/net/ethernet/intel/ice/ice_arfs.c
> index 7cee365cc7d1..3b1b892e6958 100644
> --- a/drivers/net/ethernet/intel/ice/ice_arfs.c
> +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
> @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
>         netdev = vsi->netdev;
>         if (!netdev || !netdev->rx_cpu_rmap)
>                 return;
> -
> -       free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> -       netdev->rx_cpu_rmap = NULL;
>  }
> 
>  /**
> @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)  {
>         struct net_device *netdev;
>         struct ice_pf *pf;
> -       int i;
> 
>         if (!vsi || vsi->type != ICE_VSI_PF)
>                 return 0;
> @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
>         netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s,
> q_vectors %d\n",
>                    vsi->type, netdev->name, vsi->num_q_vectors);
> 
> -       netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
> -       if (unlikely(!netdev->rx_cpu_rmap))
> -               return -EINVAL;
> -
> -       ice_for_each_q_vector(vsi, i)
> -               if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
> -                                    vsi->q_vectors[i]->irq.virq)) {
> -                       ice_free_cpu_rx_rmap(vsi);
> -                       return -EINVAL;
> -               }
> -
> -       return 0;
> +       return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors);
>  }
> 
>  /**
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index
> aeb4a6cff171..7e95e9ee36dd 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1991,6 +1991,9 @@ enum netdev_reg_state {
>   *
>   *     @threaded:      napi threaded mode is enabled
>   *
> + *     @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
> + *                        Set by calling netif_enable_cpu_rmap().
> + *
>   *     @see_all_hwtstamp_requests: device wants to see calls to
>   *                     ndo_hwtstamp_set() for all timestamp requests
>   *                     regardless of source, even if those aren't
> @@ -2398,6 +2401,9 @@ struct net_device {
>         struct lock_class_key   *qdisc_tx_busylock;
>         bool                    proto_down;
>         bool                    threaded;
> +#ifdef CONFIG_RFS_ACCEL
> +       bool                    rx_cpu_rmap_auto;
> +#endif
> 
>         /* priv_flags_slow, ungrouped to save space */
>         unsigned long           see_all_hwtstamp_requests:1;
> @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index,
>                           enum netdev_queue_type type,
>                           struct napi_struct *napi);
> 
> -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{
> -       napi->irq = irq;
> -}
> +void netif_napi_set_irq(struct napi_struct *napi, int irq);
> 
>  /* Default NAPI poll() weight
>   * Device drivers are strongly advised to not use bigger value @@ -2765,6
> +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi)
>         synchronize_net();
>  }
> 
> +#ifdef CONFIG_RFS_ACCEL
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs);
> +
> +#endif
>  struct packet_type {
>         __be16                  type;   /* This is really htons(ether_type). */
>         bool                    ignore_outgoing;
> diff --git a/net/core/dev.c b/net/core/dev.c index
> 1a90ed8cc6cc..3ee7a514dca8 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index,  }
> EXPORT_SYMBOL(netif_queue_set_napi);
> 
> +#ifdef CONFIG_RFS_ACCEL
> +static void netif_disable_cpu_rmap(struct net_device *dev) {
> +       free_irq_cpu_rmap(dev->rx_cpu_rmap);
> +       dev->rx_cpu_rmap = NULL;
> +       dev->rx_cpu_rmap_auto = false;
> +}
> +
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs) {
> +       dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
> +       if (!dev->rx_cpu_rmap)
> +               return -ENOMEM;
> +
> +       dev->rx_cpu_rmap_auto = true;
> +       return 0;
> +}
> +EXPORT_SYMBOL(netif_enable_cpu_rmap);
> +#endif
> +
> +void netif_napi_set_irq(struct napi_struct *napi, int irq) { #ifdef
> +CONFIG_RFS_ACCEL
> +       int rc;
> +#endif
> +       napi->irq = irq;
> +
> +#ifdef CONFIG_RFS_ACCEL
> +       if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) {
> +               rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq);
> +               if (rc) {
> +                       netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
> +                                   rc);
> +                       netif_disable_cpu_rmap(napi->dev);
> +               }
> +       }
> +#endif
> +}
> +EXPORT_SYMBOL(netif_napi_set_irq);
> +
>  static void napi_restore_config(struct napi_struct *n)  {
>         n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6
> +11461,10 @@ void free_netdev(struct net_device *dev)
>         /* Flush device addresses */
>         dev_addr_flush(dev);
> 
> +#ifdef CONFIG_RFS_ACCEL
> +       if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto)
> +               netif_disable_cpu_rmap(dev); #endif
>         list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
>                 netif_napi_del(p);
> 
> --
> 2.43.0

Thanks for making the change in the ENA driver.

Acked-by: David Arinzon <darinzon@...zon.com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ