[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250211210657.428439-4-ahmed.zaki@intel.com>
Date: Tue, 11 Feb 2025 14:06:54 -0700
From: Ahmed Zaki <ahmed.zaki@...el.com>
To: netdev@...r.kernel.org
Cc: intel-wired-lan@...ts.osuosl.org,
andrew+netdev@...n.ch,
edumazet@...gle.com,
kuba@...nel.org,
horms@...nel.org,
pabeni@...hat.com,
davem@...emloft.net,
michael.chan@...adcom.com,
tariqt@...dia.com,
anthony.l.nguyen@...el.com,
przemyslaw.kitszel@...el.com,
jdamato@...tly.com,
shayd@...dia.com,
akpm@...ux-foundation.org,
shayagr@...zon.com,
kalesh-anakkur.purayil@...adcom.com,
pavan.chebbi@...adcom.com,
Ahmed Zaki <ahmed.zaki@...el.com>
Subject: [PATCH net-next v8 3/6] net: napi: add CPU affinity to napi_config
A common task for most drivers is to remember the user-set CPU affinity
to its IRQs. On each netdev reset, the driver should re-assign the
user's settings to the IRQs.
Add CPU affinity mask to napi_config. To delegate the CPU affinity
management to the core, drivers must:
1 - set the new netdev flag "irq_affinity_auto":
netif_enable_irq_affinity(netdev)
2 - create the napi with persistent config:
netif_napi_add_config()
3 - bind an IRQ to the napi instance: netif_napi_set_irq()
the core will then make sure to use re-assign affinity to the napi's
IRQ.
The default IRQ mask is set to one cpu starting from the closest NUMA.
Signed-off-by: Ahmed Zaki <ahmed.zaki@...el.com>
---
include/linux/netdevice.h | 15 ++++++--
net/core/dev.c | 73 +++++++++++++++++++++++++++++++--------
2 files changed, 72 insertions(+), 16 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9344d9b632d4..63fb392558b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -352,6 +352,7 @@ struct napi_config {
u64 gro_flush_timeout;
u64 irq_suspend_timeout;
u32 defer_hard_irqs;
+ cpumask_t affinity_mask;
unsigned int napi_id;
};
@@ -394,10 +395,8 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
int irq;
-#ifdef CONFIG_RFS_ACCEL
struct irq_affinity_notify notify;
int napi_rmap_idx;
-#endif
int index;
struct napi_config *config;
};
@@ -1995,6 +1994,12 @@ enum netdev_reg_state {
*
* @threaded: napi threaded mode is enabled
*
+ * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ
+ * affinity. Set by netif_enable_irq_affinity(), then
+ * the driver must create a persistent napi by
+ * netif_napi_add_config() and finally bind the napi to
+ * IRQ (via netif_napi_set_irq()).
+ *
* @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
* Set by calling netif_enable_cpu_rmap().
*
@@ -2405,6 +2410,7 @@ struct net_device {
struct lock_class_key *qdisc_tx_busylock;
bool proto_down;
bool threaded;
+ bool irq_affinity_auto;
bool rx_cpu_rmap_auto;
/* priv_flags_slow, ungrouped to save space */
@@ -2665,6 +2671,11 @@ static inline void netdev_set_ml_priv(struct net_device *dev,
dev->ml_priv_type = type;
}
+static inline void netif_set_affinity_auto(struct net_device *dev)
+{
+ dev->irq_affinity_auto = true;
+}
+
/*
* Net namespace inlines
*/
diff --git a/net/core/dev.c b/net/core/dev.c
index 209296cef3cd..d2c942bbd5e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6871,28 +6871,39 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
}
EXPORT_SYMBOL(netif_queue_set_napi);
-#ifdef CONFIG_RFS_ACCEL
static void
-netif_irq_cpu_rmap_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
+netif_napi_irq_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
{
struct napi_struct *napi =
container_of(notify, struct napi_struct, notify);
+#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
int err;
+#endif
- err = cpu_rmap_update(rmap, napi->napi_rmap_idx, mask);
- if (err)
- netdev_warn(napi->dev, "RMAP update failed (%d)\n",
- err);
+ if (napi->config && napi->dev->irq_affinity_auto)
+ cpumask_copy(&napi->config->affinity_mask, mask);
+
+#ifdef CONFIG_RFS_ACCEL
+ if (napi->dev->rx_cpu_rmap_auto) {
+ err = cpu_rmap_update(rmap, napi->napi_rmap_idx, mask);
+ if (err)
+ netdev_warn(napi->dev, "RMAP update failed (%d)\n",
+ err);
+ }
+#endif
}
+#ifdef CONFIG_RFS_ACCEL
static void netif_napi_affinity_release(struct kref *ref)
{
struct napi_struct *napi =
container_of(ref, struct napi_struct, notify.kref);
struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
+ if (!napi->dev->rx_cpu_rmap_auto)
+ return;
rmap->obj[napi->napi_rmap_idx] = NULL;
napi->napi_rmap_idx = -1;
cpu_rmap_put(rmap);
@@ -6903,7 +6914,7 @@ static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
int rc;
- napi->notify.notify = netif_irq_cpu_rmap_notify;
+ napi->notify.notify = netif_napi_irq_notify;
napi->notify.release = netif_napi_affinity_release;
cpu_rmap_get(rmap);
rc = cpu_rmap_add(rmap, napi);
@@ -6915,7 +6926,6 @@ static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
if (rc)
goto err_set;
- set_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state);
return 0;
err_set:
@@ -6954,6 +6964,10 @@ static void netif_del_cpu_rmap(struct net_device *dev)
}
#else
+static void netif_napi_affinity_release(struct kref *ref)
+{
+}
+
static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
{
return 0;
@@ -6977,7 +6991,7 @@ void netif_napi_set_irq_locked(struct napi_struct *napi, int irq)
if (napi->irq == irq)
return;
- /* Remove existing rmap entries */
+ /* Remove existing resources */
if (test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state))
irq_set_affinity_notifier(napi->irq, NULL);
@@ -6985,9 +6999,30 @@ void netif_napi_set_irq_locked(struct napi_struct *napi, int irq)
if (irq < 0)
return;
- rc = napi_irq_cpu_rmap_add(napi, irq);
- if (rc)
- netdev_warn(napi->dev, "Unable to update aRFS map (%d)\n", rc);
+ if (napi->dev->rx_cpu_rmap_auto) {
+ rc = napi_irq_cpu_rmap_add(napi, irq);
+ if (rc) {
+ netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
+ rc);
+ return;
+ }
+ set_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state);
+
+ } else if (napi->dev->irq_affinity_auto) {
+ if (WARN_ON_ONCE(!napi->config))
+ return;
+
+ napi->notify.notify = netif_napi_irq_notify;
+ napi->notify.release = netif_napi_affinity_release;
+
+ rc = irq_set_affinity_notifier(irq, &napi->notify);
+ if (rc) {
+ netdev_warn(napi->dev, "Unable to set IRQ notifier (%d)\n",
+ rc);
+ return;
+ }
+ set_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state);
+ }
}
EXPORT_SYMBOL(netif_napi_set_irq_locked);
@@ -6996,6 +7031,11 @@ static void napi_restore_config(struct napi_struct *n)
n->defer_hard_irqs = n->config->defer_hard_irqs;
n->gro_flush_timeout = n->config->gro_flush_timeout;
n->irq_suspend_timeout = n->config->irq_suspend_timeout;
+
+ if (n->dev->irq_affinity_auto &&
+ test_bit(NAPI_STATE_HAS_NOTIFIER, &n->state))
+ irq_set_affinity(n->irq, &n->config->affinity_mask);
+
/* a NAPI ID might be stored in the config, if so use it. if not, use
* napi_hash_add to generate one for us.
*/
@@ -11575,9 +11615,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
+ unsigned int maxqs, i, numa;
struct net_device *dev;
size_t napi_config_sz;
- unsigned int maxqs;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -11679,6 +11719,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->napi_config)
goto free_all;
+ numa = dev_to_node(&dev->dev);
+ for (i = 0; i < maxqs; i++)
+ cpumask_set_cpu(cpumask_local_spread(i, numa),
+ &dev->napi_config[i].affinity_mask);
+
strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
--
2.43.0
Powered by blists - more mailing lists