lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240806151618.1373008-1-kuba@kernel.org>
Date: Tue,  6 Aug 2024 08:16:18 -0700
From: Jakub Kicinski <kuba@...nel.org>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net,
	edumazet@...gle.com,
	pabeni@...hat.com,
	ilias.apalodimas@...aro.org,
	Jakub Kicinski <kuba@...nel.org>,
	Jesper Dangaard Brouer <hawk@...nel.org>,
	Alexander Duyck <alexander.duyck@...il.com>,
	Yonglong Liu <liuyonglong@...wei.com>,
	Yunsheng Lin <linyunsheng@...wei.com>
Subject: [RFC net] net: make page pool stall netdev unregistration to avoid IOMMU crashes

There appears to be no clean way to hold onto the IOMMU, so page pool
cannot outlast the driver which created it. We have no way to stall
the driver unregister, but we can use netdev unregistration as a proxy.

Note that page pool pages may last forever, we have seen it happen
e.g. when application leaks a socket and page is stuck in its rcv queue.
Hopefully this is fine in this particular case, as we will only stall
unregistering of devices which want the page pool to manage the DMA
mapping for them, i.e. HW backed netdevs. And obviously keeping
the netdev around is preferable to a crash.

More work is needed for weird drivers which share one pool among
multiple netdevs, as they are not allowed to set the pp->netdev
pointer. We probably need to add a bit that says "don't expose
to uAPI for them".

Signed-off-by: Jakub Kicinski <kuba@...nel.org>
---
Untested, but I think it would work.. if it's not too controversial.

CC: Jesper Dangaard Brouer <hawk@...nel.org>
CC: Alexander Duyck <alexander.duyck@...il.com>
CC: Yonglong Liu <liuyonglong@...wei.com>
CC: Yunsheng Lin <linyunsheng@...wei.com>
---
 include/linux/netdevice.h |  4 ++++
 net/core/page_pool_user.c | 44 +++++++++++++++++++++++++++++++--------
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0ef3eaa23f4b..c817bde7bacc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2342,6 +2342,8 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	bool			proto_down;
 	bool			threaded;
+	/** @pp_unreg_pending: page pool code is stalling unregister */
+	bool			pp_unreg_pending;
 
 	struct list_head	net_notifier_list;
 
@@ -2371,6 +2373,8 @@ struct net_device {
 #if IS_ENABLED(CONFIG_PAGE_POOL)
 	/** @page_pools: page pools created for this netdevice */
 	struct hlist_head	page_pools;
+	/** @pp_dev_tracker: ref tracker for page pool code stalling unreg */
+	netdevice_tracker	pp_dev_tracker;
 #endif
 
 	/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 3a3277ba167b..1a4135f01130 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -349,22 +349,36 @@ static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
 	struct page_pool *pool;
 	struct hlist_node *n;
 
-	mutex_lock(&page_pools_lock);
 	hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
 		hlist_del_init(&pool->user.list);
 		pool->slow.netdev = NET_PTR_POISON;
 	}
-	mutex_unlock(&page_pools_lock);
 }
 
-static void page_pool_unreg_netdev(struct net_device *netdev)
+static void page_pool_unreg_netdev_stall(struct net_device *netdev)
+{
+	if (!netdev->pp_unreg_pending) {
+		netdev_hold(netdev, &netdev->pp_dev_tracker, GFP_KERNEL);
+		netdev->pp_unreg_pending = true;
+	} else {
+		netdev_warn(netdev,
+			    "page pool release stalling device unregister");
+	}
+}
+
+static void page_pool_unreg_netdev_unstall(struct net_device *netdev)
+{
+	netdev_put(netdev, &netdev->pp_dev_tracker);
+	netdev->pp_unreg_pending = false;
+}
+
+static void page_pool_unreg_netdev_reparent(struct net_device *netdev)
 {
 	struct page_pool *pool, *last;
 	struct net_device *lo;
 
 	lo = dev_net(netdev)->loopback_dev;
 
-	mutex_lock(&page_pools_lock);
 	last = NULL;
 	hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
 		pool->slow.netdev = lo;
@@ -375,7 +389,6 @@ static void page_pool_unreg_netdev(struct net_device *netdev)
 	if (last)
 		hlist_splice_init(&netdev->page_pools, &last->user.list,
 				  &lo->page_pools);
-	mutex_unlock(&page_pools_lock);
 }
 
 static int
@@ -383,17 +396,30 @@ page_pool_netdevice_event(struct notifier_block *nb,
 			  unsigned long event, void *ptr)
 {
 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct page_pool *pool;
+	bool has_dma;
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	if (hlist_empty(&netdev->page_pools))
+	if (hlist_empty(&netdev->page_pools) && !netdev->pp_unreg_pending)
 		return NOTIFY_OK;
 
-	if (netdev->ifindex != LOOPBACK_IFINDEX)
-		page_pool_unreg_netdev(netdev);
-	else
+	mutex_lock(&page_pools_lock);
+	has_dma = false;
+	hlist_for_each_entry(pool, &netdev->page_pools, user.list)
+		has_dma |= pool->slow.flags & PP_FLAG_DMA_MAP;
+
+	if (has_dma)
+		page_pool_unreg_netdev_stall(netdev);
+	else if (netdev->pp_unreg_pending)
+		page_pool_unreg_netdev_unstall(netdev);
+	else if (netdev->ifindex == LOOPBACK_IFINDEX)
 		page_pool_unreg_netdev_wipe(netdev);
+	else /* driver doesn't let page pools manage DMA addrs */
+		page_pool_unreg_netdev_reparent(netdev);
+	mutex_unlock(&page_pools_lock);
+
 	return NOTIFY_OK;
 }
 
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ