lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20130610132450.7ff7236a@nehalam.linuxnetplumber.net>
Date:	Mon, 10 Jun 2013 13:24:50 -0700
From:	Stephen Hemminger <stephen@...workplumber.org>
To:	davem@...emloft.net
Cc:	netdev@...r.kernel.org,
	Stephen Hemminger <stephen@...workplumber.org>
Subject: [PATCH net-next 05/12] vxlan: fix race caused by dropping
 rtnl_unlock

It is possible for two cpu's to race creating vxlan device.
For most cases this is harmless, but the ability to assign "next
avaliable vxlan device" relies on rtnl lock being held across the
whole operation. Therfore two instances of calling:
  ip li add vxlan%d vxlan ...
could collide and create two devices with same name.

To fix this defer creation of socket to a work queue, and
handle possible races there. Introduce a lock to ensure that
changes to vxlan socket hash list is SMP safe.

Signed-off-by: Stephen Hemminger <stephen@...workplumber.org>


--- a/drivers/net/vxlan.c	2013-06-10 12:19:59.002097934 -0700
+++ b/drivers/net/vxlan.c	2013-06-10 12:19:59.522092022 -0700
@@ -94,6 +94,7 @@ struct vxlan_sock {
 struct vxlan_net {
 	struct list_head  vxlan_list;
 	struct hlist_head sock_list[PORT_HASH_SIZE];
+	spinlock_t	  sock_lock;
 };
 
 struct vxlan_rdst {
@@ -131,7 +132,9 @@ struct vxlan_dev {
 	__u8		  ttl;
 	u32		  flags;	/* VXLAN_F_* below */
 
+	struct work_struct sock_work;
 	struct work_struct igmp_work;
+
 	unsigned long	  age_interval;
 	struct timer_list age_timer;
 	spinlock_t	  hash_lock;
@@ -151,6 +154,8 @@ struct vxlan_dev {
 static u32 vxlan_salt __read_mostly;
 static struct workqueue_struct *vxlan_wq;
 
+static void vxlan_sock_work(struct work_struct *work);
+
 /* Virtual Network hash table head */
 static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
 {
@@ -670,12 +675,15 @@ static void vxlan_sock_hold(struct vxlan
 	atomic_inc(&vs->refcnt);
 }
 
-static void vxlan_sock_release(struct vxlan_sock *vs)
+static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
 {
 	if (!atomic_dec_and_test(&vs->refcnt))
 		return;
 
+	spin_lock(&vn->sock_lock);
 	hlist_del_rcu(&vs->hlist);
+	spin_unlock(&vn->sock_lock);
+
 	queue_work(vxlan_wq, &vs->del_work);
 }
 
@@ -700,7 +708,7 @@ static void vxlan_igmp_work(struct work_
 		ip_mc_leave_group(sk, &mreq);
 	release_sock(sk);
 
-	vxlan_sock_release(vs);
+	vxlan_sock_release(vn, vs);
 	dev_put(vxlan->dev);
 }
 
@@ -1238,10 +1246,29 @@ static void vxlan_cleanup(unsigned long
 /* Setup stats when device is created */
 static int vxlan_init(struct net_device *dev)
 {
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+	struct vxlan_sock *vs;
+	__u32 vni = vxlan->default_dst.remote_vni;
+
 	dev->tstats = alloc_percpu(struct pcpu_tstats);
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	spin_lock(&vn->sock_lock);
+	vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
+	if (vs) {
+		/* If we have a socket with same port already, reuse it */
+		atomic_inc(&vs->refcnt);
+		vxlan->vn_sock = vs;
+		hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+	} else {
+		/* otherwise make new socket outside of RTNL */
+		dev_hold(dev);
+		queue_work(vxlan_wq, &vxlan->sock_work);
+	}
+	spin_unlock(&vn->sock_lock);
+
 	return 0;
 }
 
@@ -1249,9 +1276,14 @@ static int vxlan_init(struct net_device
 static int vxlan_open(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_sock *vs = vxlan->vn_sock;
+
+	/* socket hasn't been created */
+	if (!vs)
+		return -ENOTCONN;
 
 	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
-		vxlan_sock_hold(vxlan->vn_sock);
+		vxlan_sock_hold(vs);
 		dev_hold(dev);
 		queue_work(vxlan_wq, &vxlan->igmp_work);
 	}
@@ -1283,9 +1315,10 @@ static void vxlan_flush(struct vxlan_dev
 static int vxlan_stop(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_sock *vs = vxlan->vn_sock;
 
-	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
-		vxlan_sock_hold(vxlan->vn_sock);
+	if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
+		vxlan_sock_hold(vs);
 		dev_hold(dev);
 		queue_work(vxlan_wq, &vxlan->igmp_work);
 	}
@@ -1358,6 +1391,7 @@ static void vxlan_setup(struct net_devic
 	INIT_LIST_HEAD(&vxlan->next);
 	spin_lock_init(&vxlan->hash_lock);
 	INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
+	INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
 
 	init_timer_deferrable(&vxlan->age_timer);
 	vxlan->age_timer.function = vxlan_cleanup;
@@ -1449,7 +1483,6 @@ static void vxlan_del_work(struct work_s
 	kfree_rcu(vs, rcu);
 }
 
-/* Create new listen socket if needed */
 static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
 {
 	struct vxlan_sock *vs;
@@ -1506,13 +1539,52 @@ static struct vxlan_sock *vxlan_socket_c
 	return vs;
 }
 
+/* Scheduled at device creation to bind to a socket */
+static void vxlan_sock_work(struct work_struct *work)
+{
+	struct vxlan_dev *vxlan
+		= container_of(work, struct vxlan_dev, sock_work);
+	struct net_device *dev = vxlan->dev;
+	struct net *net = dev_net(dev);
+	__u32 vni = vxlan->default_dst.remote_vni;
+	__be16 port = vxlan->dst_port;
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_sock *nvs, *ovs;
+
+	nvs = vxlan_socket_create(net, port);
+	if (IS_ERR(nvs)) {
+		netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
+			   PTR_ERR(nvs));
+		goto out;
+	}
+
+	spin_lock(&vn->sock_lock);
+	/* Look again to see if can reuse socket */
+	ovs = vxlan_find_port(net, port);
+	if (ovs) {
+		atomic_inc(&ovs->refcnt);
+		vxlan->vn_sock = ovs;
+		hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
+		spin_unlock(&vn->sock_lock);
+
+		sk_release_kernel(nvs->sock->sk);
+		kfree(nvs);
+	} else {
+		vxlan->vn_sock = nvs;
+		hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
+		hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
+		spin_unlock(&vn->sock_lock);
+	}
+out:
+	dev_put(dev);
+}
+
 static int vxlan_newlink(struct net *net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_rdst *dst = &vxlan->default_dst;
-	struct vxlan_sock *vs;
 	__u32 vni;
 	int err;
 
@@ -1590,31 +1662,13 @@ static int vxlan_newlink(struct net *net
 		return -EEXIST;
 	}
 
-	vs = vxlan_find_port(net, vxlan->dst_port);
-	if (vs)
-		atomic_inc(&vs->refcnt);
-	else {
-		/* Drop lock because socket create acquires RTNL lock */
-		rtnl_unlock();
-		vs = vxlan_socket_create(net, vxlan->dst_port);
-		rtnl_lock();
-		if (IS_ERR(vs))
-			return PTR_ERR(vs);
-
-		hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
-	}
-	vxlan->vn_sock = vs;
-
 	SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
 
 	err = register_netdevice(dev);
-	if (err) {
-		vxlan_sock_release(vs);
+	if (err)
 		return err;
-	}
 
 	list_add(&vxlan->next, &vn->vxlan_list);
-	hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
 
 	return 0;
 }
@@ -1622,12 +1676,14 @@ static int vxlan_newlink(struct net *net
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_sock *vs = vxlan->vn_sock;
 
 	hlist_del_rcu(&vxlan->hlist);
 	list_del(&vxlan->next);
 	unregister_netdevice_queue(dev, head);
-	vxlan_sock_release(vs);
+	if (vs)
+		vxlan_sock_release(vn, vs);
 }
 
 static size_t vxlan_get_size(const struct net_device *dev)
@@ -1716,6 +1772,7 @@ static __net_init int vxlan_init_net(str
 	unsigned int h;
 
 	INIT_LIST_HEAD(&vn->vxlan_list);
+	spin_lock_init(&vn->sock_lock);
 
 	for (h = 0; h < PORT_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&vn->sock_list[h]);

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ