[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <87d2h1i0pj.wl%atzm@stratosphere.co.jp>
Date: Tue, 01 Apr 2014 23:27:52 +0900
From: Atzm Watanabe <atzm@...atosphere.co.jp>
To: netdev@...r.kernel.org,
Stephen Hemminger <stephen@...workplumber.org>
Subject: [PATCH net-next v2] vxlan: fix handling of the inner 8021Q tagged frame
Currently the implementation can forward the 8021Q tagged frame,
but the FDB cannot learn the VID.
So there is a possibility of forwarding the frame to wrong VTEP,
when same LLADDR exists on different VLANs.
This patch supports only single tagged frame, so the outermost
tag will be used when handling the 8021AD Q-in-Q frame.
v2: Fix probably unsafe operation on the struct vxlan_key.
The outermost tag will be used when handling the 8021AD
Q-in-Q frame. Based on Stephen Hemminger's comments.
Signed-off-by: Atzm Watanabe <atzm@...atosphere.co.jp>
---
drivers/net/vxlan.c | 196 ++++++++++++++++++++++++++++++++++++----------------
1 file changed, 137 insertions(+), 59 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0d862a5..22204ea 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -85,8 +85,6 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static int vxlan_net_id;
-static const u8 all_zeros_mac[ETH_ALEN];
-
/* per-network namespace private data for this module */
struct vxlan_net {
struct list_head vxlan_list;
@@ -109,6 +107,13 @@ struct vxlan_rdst {
struct rcu_head rcu;
};
+struct vxlan_key {
+ u16 vlan_id;
+ u8 eth_addr[ETH_ALEN];
+};
+
+static const struct vxlan_key all_zeros_key;
+
/* Forwarding table entry */
struct vxlan_fdb {
struct hlist_node hlist; /* linked list of entries */
@@ -118,7 +123,7 @@ struct vxlan_fdb {
struct list_head remotes;
u16 state; /* see ndm_state */
u8 flags; /* see ndm_flags */
- u8 eth_addr[ETH_ALEN];
+ struct vxlan_key key;
};
/* Pseudo network device */
@@ -339,7 +344,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
if (type == RTM_GETNEIGH) {
ndm->ndm_family = AF_INET;
send_ip = !vxlan_addr_any(&rdst->remote_ip);
- send_eth = !is_zero_ether_addr(fdb->eth_addr);
+ send_eth = !is_zero_ether_addr(fdb->key.eth_addr);
} else
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_state = fdb->state;
@@ -347,12 +352,15 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
ndm->ndm_flags = fdb->flags;
ndm->ndm_type = NDA_DST;
- if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
+ if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.eth_addr))
goto nla_put_failure;
if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
goto nla_put_failure;
+ if (fdb->key.vlan_id && nla_put_u16(skb, NDA_VLAN, fdb->key.vlan_id))
+ goto nla_put_failure;
+
if (rdst->remote_port && rdst->remote_port != vxlan->dst_port &&
nla_put_be16(skb, NDA_PORT, rdst->remote_port))
goto nla_put_failure;
@@ -386,6 +394,7 @@ static inline size_t vxlan_nlmsg_size(void)
+ nla_total_size(sizeof(__be16)) /* NDA_PORT */
+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
+ + nla_total_size(sizeof(__u16)) /* NDA_VLAN */
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
@@ -433,60 +442,62 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
}
-static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
+static void vxlan_fdb_miss(struct vxlan_dev *vxlan,
+ const struct vxlan_key *key)
{
struct vxlan_fdb f = {
.state = NUD_STALE,
};
INIT_LIST_HEAD(&f.remotes);
- memcpy(f.eth_addr, eth_addr, ETH_ALEN);
+ memcpy(&f.key, key, sizeof(*key));
vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
}
-/* Hash Ethernet address */
-static u32 eth_hash(const unsigned char *addr)
+/* Hash VLAN ID + Ethernet address */
+static u32 vxlan_key_hash(const struct vxlan_key *key)
{
- u64 value = get_unaligned((u64 *)addr);
+ u64 value = get_unaligned((u64 *)key->eth_addr);
- /* only want 6 bytes */
+ /* eth_addr only wants 6 bytes */
#ifdef __BIG_ENDIAN
value >>= 16;
#else
value <<= 16;
#endif
+ value += key->vlan_id;
return hash_64(value, FDB_HASH_BITS);
}
/* Hash chain to use given mac address */
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
- const u8 *mac)
+ const struct vxlan_key *key)
{
- return &vxlan->fdb_head[eth_hash(mac)];
+ return &vxlan->fdb_head[vxlan_key_hash(key)];
}
-/* Look up Ethernet address in forwarding table */
-static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
- const u8 *mac)
+/* Look up VLAN ID + Ethernet address in forwarding table */
+static struct vxlan_fdb *__vxlan_find_key(struct vxlan_dev *vxlan,
+ const struct vxlan_key *key)
{
- struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
+ struct hlist_head *head = vxlan_fdb_head(vxlan, key);
struct vxlan_fdb *f;
hlist_for_each_entry_rcu(f, head, hlist) {
- if (ether_addr_equal(mac, f->eth_addr))
+ if (!memcmp(key, &f->key, sizeof(*key)))
return f;
}
return NULL;
}
-static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
- const u8 *mac)
+static struct vxlan_fdb *vxlan_find_key(struct vxlan_dev *vxlan,
+ const struct vxlan_key *key)
{
struct vxlan_fdb *f;
- f = __vxlan_find_mac(vxlan, mac);
+ f = __vxlan_find_key(vxlan, key);
if (f)
f->used = jiffies;
@@ -685,7 +696,8 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
/* Add new entry to forwarding table -- assumes lock held */
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
- const u8 *mac, union vxlan_addr *ip,
+ const struct vxlan_key *key,
+ union vxlan_addr *ip,
__u16 state, __u16 flags,
__be16 port, __u32 vni, __u32 ifindex,
__u8 ndm_flags)
@@ -693,11 +705,12 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
int notify = 0;
- f = __vxlan_find_mac(vxlan, mac);
+ f = __vxlan_find_key(vxlan, key);
if (f) {
if (flags & NLM_F_EXCL) {
netdev_dbg(vxlan->dev,
- "lost race to create %pM\n", mac);
+ "lost race to create [%hu] %pM\n",
+ key->vlan_id, key->eth_addr);
return -EEXIST;
}
if (f->state != state) {
@@ -712,8 +725,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
}
if ((flags & NLM_F_REPLACE)) {
/* Only change unicasts */
- if (!(is_multicast_ether_addr(f->eth_addr) ||
- is_zero_ether_addr(f->eth_addr))) {
+ if (!(is_multicast_ether_addr(f->key.eth_addr) ||
+ is_zero_ether_addr(f->key.eth_addr))) {
int rc = vxlan_fdb_replace(f, ip, port, vni,
ifindex);
@@ -724,8 +737,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
return -EOPNOTSUPP;
}
if ((flags & NLM_F_APPEND) &&
- (is_multicast_ether_addr(f->eth_addr) ||
- is_zero_ether_addr(f->eth_addr))) {
+ (is_multicast_ether_addr(f->key.eth_addr) ||
+ is_zero_ether_addr(f->key.eth_addr))) {
int rc = vxlan_fdb_append(f, ip, port, vni, ifindex);
if (rc < 0)
@@ -741,10 +754,12 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
/* Disallow replace to add a multicast entry */
if ((flags & NLM_F_REPLACE) &&
- (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
+ (is_multicast_ether_addr(key->eth_addr) ||
+ is_zero_ether_addr(key->eth_addr)))
return -EOPNOTSUPP;
- netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+ netdev_dbg(vxlan->dev, "add [%hu] %pM -> %pIS\n",
+ key->vlan_id, key->eth_addr, ip);
f = kmalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return -ENOMEM;
@@ -754,13 +769,13 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
f->flags = ndm_flags;
f->updated = f->used = jiffies;
INIT_LIST_HEAD(&f->remotes);
- memcpy(f->eth_addr, mac, ETH_ALEN);
+ memcpy(&f->key, key, sizeof(*key));
vxlan_fdb_append(f, ip, port, vni, ifindex);
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
- vxlan_fdb_head(vxlan, mac));
+ vxlan_fdb_head(vxlan, key));
}
if (notify)
@@ -782,7 +797,7 @@ static void vxlan_fdb_free(struct rcu_head *head)
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
{
netdev_dbg(vxlan->dev,
- "delete %pM\n", f->eth_addr);
+ "delete [%hu] %pM\n", f->key.vlan_id, f->key.eth_addr);
--vxlan->addrcnt;
vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH);
@@ -792,7 +807,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
}
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
- union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex)
+ union vxlan_addr *ip, __be16 *port, u32 *vni,
+ u32 *ifindex, u16 *vlan_id)
{
struct net *net = dev_net(vxlan->dev);
int err;
@@ -814,6 +830,17 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
}
}
+ if (tb[NDA_VLAN]) {
+ if (nla_len(tb[NDA_VLAN]) != sizeof(__u16))
+ return -EINVAL;
+
+ *vlan_id = nla_get_u16(tb[NDA_VLAN]);
+ if (!*vlan_id || *vlan_id >= VLAN_VID_MASK)
+ return -EINVAL;
+ } else {
+ *vlan_id = 0;
+ }
+
if (tb[NDA_PORT]) {
if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
return -EINVAL;
@@ -856,6 +883,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
union vxlan_addr ip;
__be16 port;
u32 vni, ifindex;
+ u16 vlan_id;
+ struct vxlan_key key;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@ -867,12 +896,16 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (tb[NDA_DST] == NULL)
return -EINVAL;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni,
+ &ifindex, &vlan_id);
if (err)
return err;
+ ether_addr_copy(key.eth_addr, addr);
+ key.vlan_id = vlan_id;
+
spin_lock_bh(&vxlan->hash_lock);
- err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
+ err = vxlan_fdb_create(vxlan, &key, &ip, ndm->ndm_state, flags,
port, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock);
@@ -890,16 +923,22 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
union vxlan_addr ip;
__be16 port;
u32 vni, ifindex;
+ u16 vlan_id;
+ struct vxlan_key key;
int err;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni,
+ &ifindex, &vlan_id);
if (err)
return err;
+ ether_addr_copy(key.eth_addr, addr);
+ key.vlan_id = vlan_id;
+
err = -ENOENT;
spin_lock_bh(&vxlan->hash_lock);
- f = vxlan_find_mac(vxlan, addr);
+ f = vxlan_find_key(vxlan, &key);
if (!f)
goto out;
@@ -967,12 +1006,13 @@ out:
* Return true if packet is bogus and should be droppped.
*/
static bool vxlan_snoop(struct net_device *dev,
- union vxlan_addr *src_ip, const u8 *src_mac)
+ union vxlan_addr *src_ip,
+ const struct vxlan_key *key)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
- f = vxlan_find_mac(vxlan, src_mac);
+ f = vxlan_find_key(vxlan, key);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
@@ -985,8 +1025,9 @@ static bool vxlan_snoop(struct net_device *dev,
if (net_ratelimit())
netdev_info(dev,
- "%pM migrated from %pIS to %pIS\n",
- src_mac, &rdst->remote_ip, &src_ip);
+ "[%hu] %pM migrated from %pIS to %pIS\n",
+ key->vlan_id, key->eth_addr,
+ &rdst->remote_ip, &src_ip);
rdst->remote_ip = *src_ip;
f->updated = jiffies;
@@ -997,7 +1038,7 @@ static bool vxlan_snoop(struct net_device *dev,
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
- vxlan_fdb_create(vxlan, src_mac, src_ip,
+ vxlan_fdb_create(vxlan, key, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
vxlan->dst_port,
@@ -1187,6 +1228,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
__u32 vni;
int err = 0;
union vxlan_addr *remote_ip;
+ struct vxlan_key key;
vni = ntohl(vx_vni) >> 8;
/* Is this VNI defined? */
@@ -1215,8 +1257,18 @@ static void vxlan_rcv(struct vxlan_sock *vs,
#endif
}
+ ether_addr_copy(key.eth_addr, eth_hdr(skb)->h_source);
+ switch (ntohs(eth_hdr(skb)->h_proto)) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ key.vlan_id = ntohs(vlan_eth_hdr(skb)->h_vlan_TCI) & VLAN_VID_MASK;
+ break;
+ default:
+ key.vlan_id = 0;
+ }
+
if ((vxlan->flags & VXLAN_F_LEARN) &&
- vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
+ vxlan_snoop(skb->dev, &saddr, &key))
goto drop;
skb_reset_network_header(skb);
@@ -1297,13 +1349,17 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
if (n) {
struct vxlan_fdb *f;
struct sk_buff *reply;
+ struct vxlan_key key;
if (!(n->nud_state & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
- f = vxlan_find_mac(vxlan, n->ha);
+ ether_addr_copy(key.eth_addr, n->ha);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ f = vxlan_find_key(vxlan, &key);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */
neigh_release(n);
@@ -1318,6 +1374,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
if (reply == NULL)
goto out;
+ if (vlan_tx_tag_present(skb))
+ __vlan_hwaccel_put_tag(reply, skb->vlan_proto, skb->vlan_tci);
+
skb_reset_mac_header(reply);
__skb_pull(reply, skb_network_offset(reply));
reply->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1462,13 +1521,17 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
if (n) {
struct vxlan_fdb *f;
struct sk_buff *reply;
+ struct vxlan_key key;
if (!(n->nud_state & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
- f = vxlan_find_mac(vxlan, n->ha);
+ ether_addr_copy(key.eth_addr, n->ha);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ f = vxlan_find_key(vxlan, &key);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */
neigh_release(n);
@@ -1483,6 +1546,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
if (reply == NULL)
goto out;
+ if (vlan_tx_tag_present(skb))
+ __vlan_hwaccel_put_tag(reply, skb->vlan_proto, skb->vlan_tci);
+
if (netif_rx_ni(reply) == NET_RX_DROP)
dev->stats.rx_dropped++;
@@ -1782,8 +1848,14 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
#endif
}
- if (dst_vxlan->flags & VXLAN_F_LEARN)
- vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source);
+ if (dst_vxlan->flags & VXLAN_F_LEARN) {
+ struct vxlan_key key;
+
+ ether_addr_copy(key.eth_addr, eth_hdr(skb)->h_source);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ vxlan_snoop(skb->dev, &loopback, &key);
+ }
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
@@ -1962,6 +2034,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
bool did_rsc = false;
struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f;
+ struct vxlan_key key;
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
@@ -1983,23 +2056,28 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
#endif
}
- f = vxlan_find_mac(vxlan, eth->h_dest);
+ ether_addr_copy(key.eth_addr, eth->h_dest);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ f = vxlan_find_key(vxlan, &key);
did_rsc = false;
if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
(ntohs(eth->h_proto) == ETH_P_IP ||
ntohs(eth->h_proto) == ETH_P_IPV6)) {
did_rsc = route_shortcircuit(dev, skb);
- if (did_rsc)
- f = vxlan_find_mac(vxlan, eth->h_dest);
+ if (did_rsc) {
+ ether_addr_copy(key.eth_addr, eth->h_dest);
+ f = vxlan_find_key(vxlan, &key);
+ }
}
if (f == NULL) {
- f = vxlan_find_mac(vxlan, all_zeros_mac);
+ f = vxlan_find_key(vxlan, &all_zeros_key);
if (f == NULL) {
if ((vxlan->flags & VXLAN_F_L2MISS) &&
!is_multicast_ether_addr(eth->h_dest))
- vxlan_fdb_miss(vxlan, eth->h_dest);
+ vxlan_fdb_miss(vxlan, &key);
dev->stats.tx_dropped++;
kfree_skb(skb);
@@ -2050,8 +2128,8 @@ static void vxlan_cleanup(unsigned long arg)
timeout = f->used + vxlan->age_interval * HZ;
if (time_before_eq(timeout, jiffies)) {
netdev_dbg(vxlan->dev,
- "garbage collect %pM\n",
- f->eth_addr);
+ "garbage collect [%hu] %pM\n",
+ f->key.vlan_id, f->key.eth_addr);
f->state = NUD_STALE;
vxlan_fdb_destroy(vxlan, f);
} else if (time_before(timeout, next_timer))
@@ -2103,7 +2181,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
struct vxlan_fdb *f;
spin_lock_bh(&vxlan->hash_lock);
- f = __vxlan_find_mac(vxlan, all_zeros_mac);
+ f = __vxlan_find_key(vxlan, &all_zeros_key);
if (f)
vxlan_fdb_destroy(vxlan, f);
spin_unlock_bh(&vxlan->hash_lock);
@@ -2154,8 +2232,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
- /* the all_zeros_mac entry is deleted at vxlan_uninit */
- if (!is_zero_ether_addr(f->eth_addr))
+ /* the all_zeros_key entry is deleted at vxlan_uninit */
+ if (!memcmp(&f->key, &all_zeros_key, sizeof(all_zeros_key)))
vxlan_fdb_destroy(vxlan, f);
}
}
@@ -2706,7 +2784,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
/* create an fdb entry for a valid default destination */
if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
- err = vxlan_fdb_create(vxlan, all_zeros_mac,
+ err = vxlan_fdb_create(vxlan, &all_zeros_key,
&vxlan->default_dst.remote_ip,
NUD_REACHABLE|NUD_PERMANENT,
NLM_F_EXCL|NLM_F_CREATE,
--
1.8.1.5
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists