lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130603112616.3ad018e2@nehalam.linuxnetplumber.net>
Date:	Mon, 3 Jun 2013 11:26:16 -0700
From:	Stephen Hemminger <stephen@...workplumber.org>
To:	Mike Rapoport <mike.rapoport@...ellosystems.com>
Cc:	Thomas Graf <tgraf@...g.ch>, netdev@...r.kernel.org
Subject: [RFC] vxlan: convert remote list to list_rcu

This is based on earlier step from Mike Rapoport

My changes:
 1. eliminate second rcu for rdst free
 2. handle possilble races where fdb notify called but no rdst
 3. only update snooped entries for dynamic entries, shouldn't modify static entries
 4. clean up the multi-dest tx code

This is compile tested only.

#3 is probably a bug that should be fixed differently in -stable


---
 drivers/net/vxlan.c | 75 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 28 deletions(-)

--- a/drivers/net/vxlan.c	2013-06-03 11:08:39.214230482 -0700
+++ b/drivers/net/vxlan.c	2013-06-03 11:11:37.220114181 -0700
@@ -101,7 +101,7 @@ struct vxlan_rdst {
 	__be16			 remote_port;
 	u32			 remote_vni;
 	u32			 remote_ifindex;
-	struct vxlan_rdst	*remote_next;
+	struct list_head	 list;
 };
 
 /* Forwarding table entry */
@@ -110,7 +110,7 @@ struct vxlan_fdb {
 	struct rcu_head	  rcu;
 	unsigned long	  updated;	/* jiffies */
 	unsigned long	  used;
-	struct vxlan_rdst remote;
+	struct list_head  remotes;
 	u16		  state;	/* see ndm_state */
 	u8		  flags;	/* see ndm_flags */
 	u8		  eth_addr[ETH_ALEN];
@@ -163,6 +163,12 @@ static inline struct hlist_head *vs_head
 	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
 }
 
+/* First remote destination for a forwarding entry (or NULL) */
+static inline struct vxlan_rdst *first_remote(struct vxlan_fdb *fdb)
+{
+	return list_first_or_null_rcu(&fdb->remotes, struct vxlan_rdst, list);
+}
+
 /* Find VXLAN socket based on network namespace and UDP port */
 static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
 {
@@ -216,10 +222,11 @@ static int vxlan_fdb_info(struct sk_buff
 
 	if (type == RTM_GETNEIGH) {
 		ndm->ndm_family	= AF_INET;
-		send_ip = rdst->remote_ip != htonl(INADDR_ANY);
+		send_ip = rdst && rdst->remote_ip != htonl(INADDR_ANY);
 		send_eth = !is_zero_ether_addr(fdb->eth_addr);
 	} else
 		ndm->ndm_family	= AF_BRIDGE;
+
 	ndm->ndm_state = fdb->state;
 	ndm->ndm_ifindex = vxlan->dev->ifindex;
 	ndm->ndm_flags = fdb->flags;
@@ -228,18 +235,20 @@ static int vxlan_fdb_info(struct sk_buff
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 
-	if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
-		goto nla_put_failure;
-
-	if (rdst->remote_port && rdst->remote_port != vxlan->dst_port &&
-	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
-		goto nla_put_failure;
-	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
-	    nla_put_be32(skb, NDA_VNI, rdst->remote_vni))
-		goto nla_put_failure;
-	if (rdst->remote_ifindex &&
-	    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
-		goto nla_put_failure;
+	if (rdst) {
+		if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
+			goto nla_put_failure;
+
+		if (rdst->remote_port && rdst->remote_port != vxlan->dst_port &&
+		    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+			goto nla_put_failure;
+		if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
+		    nla_put_be32(skb, NDA_VNI, rdst->remote_vni))
+			goto nla_put_failure;
+		if (rdst->remote_ifindex &&
+		    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
+			goto nla_put_failure;
+	}
 
 	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
 	ci.ndm_confirmed = 0;
@@ -268,7 +277,7 @@ static inline size_t vxlan_nlmsg_size(vo
 }
 
 static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
-			     const struct vxlan_fdb *fdb, int type)
+			     struct vxlan_fdb *fdb, int type)
 {
 	struct net *net = dev_net(vxlan->dev);
 	struct sk_buff *skb;
@@ -278,7 +287,7 @@ static void vxlan_fdb_notify(struct vxla
 	if (skb == NULL)
 		goto errout;
 
-	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote);
+	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, first_remote(fdb));
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -297,11 +306,16 @@ static void vxlan_ip_miss(struct net_dev
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb f;
+	struct vxlan_rdst remote;
 
 	memset(&f, 0, sizeof f);
 	f.state = NUD_STALE;
-	f.remote.remote_ip = ipa; /* goes to NDA_DST */
-	f.remote.remote_vni = VXLAN_N_VID;
+
+	remote.remote_ip = ipa; /* goes to NDA_DST */
+	remote.remote_vni = VXLAN_N_VID;
+
+	INIT_LIST_HEAD(&f.remotes);
+	list_add_rcu(&remote.list, &f.remotes);
 
 	vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
 }
@@ -370,17 +384,17 @@ static struct vxlan_fdb *vxlan_find_mac(
 static int vxlan_fdb_append(struct vxlan_fdb *f,
 			    __be32 ip, __be16 port, __u32 vni, __u32 ifindex)
 {
-	struct vxlan_rdst *rd_prev, *rd;
+	struct vxlan_rdst *rd;
 
-	rd_prev = NULL;
-	for (rd = &f->remote; rd; rd = rd->remote_next) {
+	/* protected by vxlan->hash_lock */
+	list_for_each_entry(rd, &f->remotes, list) {
 		if (rd->remote_ip == ip &&
 		    rd->remote_port == port &&
 		    rd->remote_vni == vni &&
 		    rd->remote_ifindex == ifindex)
 			return 0;
-		rd_prev = rd;
 	}
+
 	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
 	if (rd == NULL)
 		return -ENOBUFS;
@@ -388,8 +402,9 @@ static int vxlan_fdb_append(struct vxlan
 	rd->remote_port = port;
 	rd->remote_vni = vni;
 	rd->remote_ifindex = ifindex;
-	rd->remote_next = NULL;
-	rd_prev->remote_next = rd;
+
+	list_add_tail_rcu(&rd->list, &f->remotes);
+
 	return 1;
 }
 
@@ -441,16 +456,14 @@ static int vxlan_fdb_create(struct vxlan
 			return -ENOMEM;
 
 		notify = 1;
-		f->remote.remote_ip = ip;
-		f->remote.remote_port = port;
-		f->remote.remote_vni = vni;
-		f->remote.remote_ifindex = ifindex;
-		f->remote.remote_next = NULL;
 		f->state = state;
 		f->flags = ndm_flags;
 		f->updated = f->used = jiffies;
+		INIT_LIST_HEAD(&f->remotes);
 		memcpy(f->eth_addr, mac, ETH_ALEN);
 
+		vxlan_fdb_append(f, ip, port, vni, ifindex);
+
 		++vxlan->addrcnt;
 		hlist_add_head_rcu(&f->hlist,
 				   vxlan_fdb_head(vxlan, mac));
@@ -465,13 +478,10 @@ static int vxlan_fdb_create(struct vxlan
 static void vxlan_fdb_free(struct rcu_head *head)
 {
 	struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+	struct vxlan_rdst *rd, *nd;
 
-	while (f->remote.remote_next) {
-		struct vxlan_rdst *rd = f->remote.remote_next;
-
-		f->remote.remote_next = rd->remote_next;
+	list_for_each_entry_safe(rd, nd, &f->remotes, list)
 		kfree(rd);
-	}
 	kfree(f);
 }
 
@@ -577,26 +587,25 @@ static int vxlan_fdb_dump(struct sk_buff
 
 	for (h = 0; h < FDB_HASH_SIZE; ++h) {
 		struct vxlan_fdb *f;
-		int err;
 
 		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
-			struct vxlan_rdst *rd;
-			for (rd = &f->remote; rd; rd = rd->remote_next) {
-				if (idx < cb->args[0])
-					goto skip;
+			struct vxlan_rdst *rdst;
 
+			list_for_each_entry_rcu(rdst, &f->remotes, list) {
+				int err;
 				err = vxlan_fdb_info(skb, vxlan, f,
 						     NETLINK_CB(cb->skb).portid,
 						     cb->nlh->nlmsg_seq,
 						     RTM_NEWNEIGH,
-						     NLM_F_MULTI, rd);
+						     NLM_F_MULTI, rdst);
 				if (err < 0)
-					break;
-skip:
+					goto errout;
+
 				++idx;
 			}
 		}
 	}
+ errout:
 
 	return idx;
 }
@@ -613,16 +622,24 @@ static void vxlan_snoop(struct net_devic
 
 	f = vxlan_find_mac(vxlan, src_mac);
 	if (likely(f)) {
-		if (likely(f->remote.remote_ip == src_ip))
+		struct vxlan_rdst *remote;
+
+		/* is this a static entry, ignore snooping */
+		if (!(f->flags & NTF_SELF))
+			return;
+
+		remote = first_remote(f);
+		if (!remote || remote->remote_ip == src_ip)
 			return;
 
 		if (net_ratelimit())
 			netdev_info(dev,
 				    "%pM migrated from %pI4 to %pI4\n",
-				    src_mac, &f->remote.remote_ip, &src_ip);
+				    src_mac, &remote->remote_ip, &src_ip);
 
-		f->remote.remote_ip = src_ip;
+		remote->remote_ip = src_ip;
 		f->updated = jiffies;
+		vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH);
 	} else {
 		/* learned new entry */
 		spin_lock(&vxlan->hash_lock);
@@ -856,6 +873,7 @@ static int arp_reduce(struct net_device
 	if (n) {
 		struct vxlan_fdb *f;
 		struct sk_buff	*reply;
+		struct vxlan_rdst *remote;
 
 		if (!(n->nud_state & NUD_CONNECTED)) {
 			neigh_release(n);
@@ -863,7 +881,9 @@ static int arp_reduce(struct net_device
 		}
 
 		f = vxlan_find_mac(vxlan, n->ha);
-		if (f && f->remote.remote_ip == htonl(INADDR_ANY)) {
+		if (f &&
+		    (remote = first_remote(f)) &&
+		    remote->remote_ip == htonl(INADDR_ANY)) {
 			/* bridge-local neighbor */
 			neigh_release(n);
 			goto out;
@@ -1153,9 +1173,8 @@ static netdev_tx_t vxlan_xmit(struct sk_
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct ethhdr *eth;
 	bool did_rsc = false;
-	struct vxlan_rdst *rdst0, *rdst;
+	struct vxlan_rdst *rdst0;
 	struct vxlan_fdb *f;
-	int rc1, rc;
 
 	skb_reset_mac_header(skb);
 	eth = eth_hdr(skb);
@@ -1173,32 +1192,40 @@ static netdev_tx_t vxlan_xmit(struct sk_
 			f = vxlan_find_mac(vxlan, eth->h_dest);
 	}
 
-	if (f == NULL) {
+	if (f) {
+		struct vxlan_rdst *rdst;
+
+		rdst0 = NULL;
+		list_for_each_entry_rcu(rdst, &f->remotes, list) {
+			if (rdst0) {
+				struct sk_buff *skb1;
+				skb1 = skb_clone(skb, GFP_ATOMIC);
+				if (skb1)
+					vxlan_xmit_one(skb1, dev,
+						       rdst0, did_rsc);
+				else
+					++dev->stats.tx_dropped;
+			}
+			rdst0 = rdst;
+		}
+
+		if (!rdst0) {
+			/* forwarding entry but destination list empty */
+			++dev->stats.tx_dropped;
+			kfree_skb(skb);
+			return NETDEV_TX_OK;
+		}
+
+	} else {
 		rdst0 = &vxlan->default_dst;
 
 		if (rdst0->remote_ip == htonl(INADDR_ANY) &&
 		    (vxlan->flags & VXLAN_F_L2MISS) &&
 		    !is_multicast_ether_addr(eth->h_dest))
 			vxlan_fdb_miss(vxlan, eth->h_dest);
-	} else
-		rdst0 = &f->remote;
-
-	rc = NETDEV_TX_OK;
-
-	/* if there are multiple destinations, send copies */
-	for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) {
-		struct sk_buff *skb1;
-
-		skb1 = skb_clone(skb, GFP_ATOMIC);
-		rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc);
-		if (rc == NETDEV_TX_OK)
-			rc = rc1;
 	}
 
-	rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc);
-	if (rc == NETDEV_TX_OK)
-		rc = rc1;
-	return rc;
+	return vxlan_xmit_one(skb, dev, rdst0, did_rsc);
 }
 
 /* Walk the forwarding table and purge stale entries */
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ