lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Thu,  6 Jun 2019 15:40:14 +0300
From:   Ilya Maximets <i.maximets@...sung.com>
To:     netdev@...r.kernel.org
Cc:     linux-kernel@...r.kernel.org, bpf@...r.kernel.org,
        xdp-newbies@...r.kernel.org,
        "David S. Miller" <davem@...emloft.net>,
        Björn Töpel <bjorn.topel@...el.com>,
        Magnus Karlsson <magnus.karlsson@...el.com>,
        Ilya Maximets <i.maximets@...sung.com>
Subject: [PATCH] net: Fix hang while unregistering device bound to xdp
 socket

Device that bound to XDP socket will not have zero refcount until the
userspace application will not close it. This leads to hang inside
'netdev_wait_allrefs()' if device unregistering requested:

  # ip link del p1
  < hang on recvmsg on netlink socket >

  # ps -x | grep ip
  5126  pts/0    D+   0:00 ip link del p1

  # journalctl -b

  Jun 05 07:19:16 kernel:
  unregister_netdevice: waiting for p1 to become free. Usage count = 1

  Jun 05 07:19:27 kernel:
  unregister_netdevice: waiting for p1 to become free. Usage count = 1
  ...

Fix that by counting XDP references for the device and failing
RTM_DELLINK with EBUSY if device is still in use by any XDP socket.

With this change:

  # ip link del p1
  RTNETLINK answers: Device or resource busy

Fixes: 965a99098443 ("xsk: add support for bind for Rx")
Signed-off-by: Ilya Maximets <i.maximets@...sung.com>
---

Another option could be to force closing all the corresponding AF_XDP
sockets, but I didn't figure out how to do this properly yet.

 include/linux/netdevice.h | 25 +++++++++++++++++++++++++
 net/core/dev.c            | 10 ++++++++++
 net/core/rtnetlink.c      |  6 ++++++
 net/xdp/xsk.c             |  7 ++++++-
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 44b47e9df94a..24451cfc5590 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1705,6 +1705,7 @@ enum netdev_priv_flags {
  *	@watchdog_timer:	List of timers
  *
  *	@pcpu_refcnt:		Number of references to this device
+ *	@pcpu_xdp_refcnt:	Number of XDP socket references to this device
  *	@todo_list:		Delayed register/unregister
  *	@link_watch_list:	XXX: need comments on this one
  *
@@ -1966,6 +1967,7 @@ struct net_device {
 	struct timer_list	watchdog_timer;
 
 	int __percpu		*pcpu_refcnt;
+	int __percpu		*pcpu_xdp_refcnt;
 	struct list_head	todo_list;
 
 	struct list_head	link_watch_list;
@@ -2636,6 +2638,7 @@ static inline void unregister_netdevice(struct net_device *dev)
 }
 
 int netdev_refcnt_read(const struct net_device *dev);
+int netdev_xdp_refcnt_read(const struct net_device *dev);
 void free_netdev(struct net_device *dev);
 void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
@@ -3739,6 +3742,28 @@ static inline void dev_hold(struct net_device *dev)
 	this_cpu_inc(*dev->pcpu_refcnt);
 }
 
+/**
+ *	dev_put_xdp - release xdp reference to device
+ *	@dev: network device
+ *
+ * Decrease the reference counter of XDP sockets bound to device.
+ */
+static inline void dev_put_xdp(struct net_device *dev)
+{
+	this_cpu_dec(*dev->pcpu_xdp_refcnt);
+}
+
+/**
+ *	dev_hold_xdp - get xdp reference to device
+ *	@dev: network device
+ *
+ * Increase the reference counter of XDP sockets bound to device.
+ */
+static inline void dev_hold_xdp(struct net_device *dev)
+{
+	this_cpu_inc(*dev->pcpu_xdp_refcnt);
+}
+
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
diff --git a/net/core/dev.c b/net/core/dev.c
index 66f7508825bd..f6f7cf3d8e93 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8840,6 +8840,16 @@ int netdev_refcnt_read(const struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 
+int netdev_xdp_refcnt_read(const struct net_device *dev)
+{
+	int i, refcnt = 0;
+
+	for_each_possible_cpu(i)
+		refcnt += *per_cpu_ptr(dev->pcpu_xdp_refcnt, i);
+	return refcnt;
+}
+EXPORT_SYMBOL(netdev_xdp_refcnt_read);
+
 /**
  * netdev_wait_allrefs - wait until all references are gone.
  * @dev: target net_device
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index adcc045952c2..f88bf52d41b3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2777,6 +2777,9 @@ static int rtnl_group_dellink(const struct net *net, int group)
 			ops = dev->rtnl_link_ops;
 			if (!ops || !ops->dellink)
 				return -EOPNOTSUPP;
+
+			if (netdev_xdp_refcnt_read(dev))
+				return -EBUSY;
 		}
 	}
 
@@ -2805,6 +2808,9 @@ int rtnl_delete_link(struct net_device *dev)
 	if (!ops || !ops->dellink)
 		return -EOPNOTSUPP;
 
+	if (netdev_xdp_refcnt_read(dev))
+		return -EBUSY;
+
 	ops->dellink(dev, &list_kill);
 	unregister_netdevice_many(&list_kill);
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a14e8864e4fa..215cc8712b8d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -361,6 +361,7 @@ static int xsk_release(struct socket *sock)
 		xdp_del_sk_umem(xs->umem, xs);
 		xs->dev = NULL;
 		synchronize_net();
+		dev_put_xdp(dev);
 		dev_put(dev);
 	}
 
@@ -423,6 +424,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		goto out_release;
 	}
 
+	dev_hold_xdp(dev);
+
 	if (!xs->rx && !xs->tx) {
 		err = -EINVAL;
 		goto out_unlock;
@@ -490,8 +493,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	xdp_add_sk_umem(xs->umem, xs);
 
 out_unlock:
-	if (err)
+	if (err) {
+		dev_put_xdp(dev);
 		dev_put(dev);
+	}
 out_release:
 	mutex_unlock(&xs->mutex);
 	return err;
-- 
2.17.1

Powered by blists - more mailing lists