lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240908160702.56618-2-jdamato@fastly.com>
Date: Sun,  8 Sep 2024 16:06:35 +0000
From: Joe Damato <jdamato@...tly.com>
To: netdev@...r.kernel.org
Cc: mkarsten@...terloo.ca,
	kuba@...nel.org,
	skhawaja@...gle.com,
	sdf@...ichev.me,
	bjorn@...osinc.com,
	amritha.nambiar@...el.com,
	sridhar.samudrala@...el.com,
	Joe Damato <jdamato@...tly.com>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Paolo Abeni <pabeni@...hat.com>,
	Jonathan Corbet <corbet@....net>,
	Jiri Pirko <jiri@...nulli.us>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
	Lorenzo Bianconi <lorenzo@...nel.org>,
	linux-doc@...r.kernel.org (open list:DOCUMENTATION),
	linux-kernel@...r.kernel.org (open list)
Subject: [RFC net-next v2 1/9] net: napi: Add napi_storage

Add a persistent NAPI storage area for NAPI configuration to the core.
Drivers opt-in to setting the storage for a NAPI by passing an index
when calling netif_napi_add_storage.

napi_storage is allocated in alloc_netdev_mqs, freed in free_netdev
(after the NAPIs are deleted), and set to 0 when napi_enable is called.

Signed-off-by: Joe Damato <jdamato@...tly.com>
---
 .../networking/net_cachelines/net_device.rst  |  1 +
 include/linux/netdevice.h                     | 34 +++++++++++++++++++
 net/core/dev.c                                | 18 +++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index 22b07c814f4a..a82751c88d18 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -106,6 +106,7 @@ rx_handler_func_t*                  rx_handler              read_mostly
 void*                               rx_handler_data         read_mostly         -                   
 struct_netdev_queue*                ingress_queue           read_mostly         -                   
 struct_bpf_mprog_entry              tcx_ingress             -                   read_mostly         sch_handle_ingress
+struct napi_storage*                napi_storage            -                   read_mostly         napi_complete_done
 struct_nf_hook_entries*             nf_hooks_ingress                                                
 unsigned_char                       broadcast[32]                                                   
 struct_cpu_rmap*                    rx_cpu_rmap                                                     
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b47c00657bd0..54da1c800e65 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -342,6 +342,14 @@ struct gro_list {
  */
 #define GRO_HASH_BUCKETS	8
 
+/*
+ * Structure for per-NAPI storage
+ */
+struct napi_storage {
+	u64 gro_flush_timeout;
+	u32 defer_hard_irqs;
+};
+
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
@@ -377,6 +385,8 @@ struct napi_struct {
 	struct list_head	dev_list;
 	struct hlist_node	napi_hash_node;
 	int			irq;
+	int			index;
+	struct napi_storage	*napi_storage;
 };
 
 enum {
@@ -2009,6 +2019,9 @@ enum netdev_reg_state {
  *	@dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
  *		   where the clock is recovered.
  *
+ *	@napi_storage: An array of napi_storage structures containing per-NAPI
+ *		       settings.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2087,6 +2100,7 @@ struct net_device {
 #ifdef CONFIG_NET_XGRESS
 	struct bpf_mprog_entry __rcu *tcx_ingress;
 #endif
+	struct napi_storage	*napi_storage;
 	__cacheline_group_end(net_device_read_rx);
 
 	char			name[IFNAMSIZ];
@@ -2648,6 +2662,24 @@ netif_napi_add_tx_weight(struct net_device *dev,
 	netif_napi_add_weight(dev, napi, poll, weight);
 }
 
+/**
+ * netif_napi_add_storage - initialize a NAPI context and set storage area
+ * @dev: network device
+ * @napi: NAPI context
+ * @poll: polling function
+ * @weight: the poll weight of this NAPI
+ * @index: the NAPI index
+ */
+static inline void
+netif_napi_add_storage(struct net_device *dev, struct napi_struct *napi,
+		       int (*poll)(struct napi_struct *, int), int weight,
+		       int index)
+{
+	napi->index = index;
+	napi->napi_storage = &dev->napi_storage[index];
+	netif_napi_add_weight(dev, napi, poll, weight);
+}
+
 /**
  * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
  * @dev:  network device
@@ -2683,6 +2715,8 @@ void __netif_napi_del(struct napi_struct *napi);
  */
 static inline void netif_napi_del(struct napi_struct *napi)
 {
+	napi->napi_storage = NULL;
+	napi->index = -1;
 	__netif_napi_del(napi);
 	synchronize_net();
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 22c3f14d9287..ca90e8cab121 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6719,6 +6719,9 @@ void napi_enable(struct napi_struct *n)
 		if (n->dev->threaded && n->thread)
 			new |= NAPIF_STATE_THREADED;
 	} while (!try_cmpxchg(&n->state, &val, new));
+
+	if (n->napi_storage)
+		memset(n->napi_storage, 0, sizeof(*n->napi_storage));
 }
 EXPORT_SYMBOL(napi_enable);
 
@@ -11054,6 +11057,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
+	size_t napi_storage_sz;
+	unsigned int maxqs;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -11067,6 +11072,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
+	WARN_ON_ONCE(txqs != rxqs);
+	maxqs = max(txqs, rxqs);
+
 	dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
 		       GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
 	if (!dev)
@@ -11141,6 +11149,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	if (!dev->ethtool)
 		goto free_all;
 
+	napi_storage_sz = array_size(maxqs, sizeof(*dev->napi_storage));
+	dev->napi_storage = kvzalloc(napi_storage_sz, GFP_KERNEL_ACCOUNT);
+	if (!dev->napi_storage)
+		goto free_all;
+
 	strscpy(dev->name, name);
 	dev->name_assign_type = name_assign_type;
 	dev->group = INIT_NETDEV_GROUP;
@@ -11202,6 +11215,8 @@ void free_netdev(struct net_device *dev)
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+	kvfree(dev->napi_storage);
+
 	ref_tracker_dir_exit(&dev->refcnt_tracker);
 #ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
@@ -11979,7 +11994,8 @@ static void __init net_dev_struct_check(void)
 #ifdef CONFIG_NET_XGRESS
 	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
 #endif
-	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_storage);
+	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 112);
 }
 
 /*
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ