[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <87a9c6g8l2.wl%atzm@stratosphere.co.jp>
Date: Mon, 31 Mar 2014 15:56:09 +0900
From: Atzm Watanabe <atzm@...atosphere.co.jp>
To: netdev@...r.kernel.org
Subject: [PATCH net-next] vxlan: fix handling of the inner 8021Q tagged frame
Currently the implementation can forward the 8021Q tagged frame,
but the FDB cannot learn the VID.
So there is a possibility of forwarding the frame to wrong VTEP,
when same LLADDR exists on different VLANs.
Signed-off-by: Atzm Watanabe <atzm@...atosphere.co.jp>
---
drivers/net/vxlan.c | 198 ++++++++++++++++++++++++++++++++++------------------
1 file changed, 132 insertions(+), 66 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0d862a5..d629c57 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -85,8 +85,6 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static int vxlan_net_id;
-static const u8 all_zeros_mac[ETH_ALEN];
-
/* per-network namespace private data for this module */
struct vxlan_net {
struct list_head vxlan_list;
@@ -109,6 +107,13 @@ struct vxlan_rdst {
struct rcu_head rcu;
};
+struct vxlan_key {
+ u16 vlan_id;
+ u8 eth_addr[ETH_ALEN];
+};
+
+static const struct vxlan_key all_zeros_key;
+
/* Forwarding table entry */
struct vxlan_fdb {
struct hlist_node hlist; /* linked list of entries */
@@ -118,7 +123,7 @@ struct vxlan_fdb {
struct list_head remotes;
u16 state; /* see ndm_state */
u8 flags; /* see ndm_flags */
- u8 eth_addr[ETH_ALEN];
+ struct vxlan_key key;
};
/* Pseudo network device */
@@ -339,7 +344,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
if (type == RTM_GETNEIGH) {
ndm->ndm_family = AF_INET;
send_ip = !vxlan_addr_any(&rdst->remote_ip);
- send_eth = !is_zero_ether_addr(fdb->eth_addr);
+ send_eth = !is_zero_ether_addr(fdb->key.eth_addr);
} else
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_state = fdb->state;
@@ -347,12 +352,15 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
ndm->ndm_flags = fdb->flags;
ndm->ndm_type = NDA_DST;
- if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
+ if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.eth_addr))
goto nla_put_failure;
if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
goto nla_put_failure;
+ if (fdb->key.vlan_id && nla_put_u16(skb, NDA_VLAN, fdb->key.vlan_id))
+ goto nla_put_failure;
+
if (rdst->remote_port && rdst->remote_port != vxlan->dst_port &&
nla_put_be16(skb, NDA_PORT, rdst->remote_port))
goto nla_put_failure;
@@ -386,6 +394,7 @@ static inline size_t vxlan_nlmsg_size(void)
+ nla_total_size(sizeof(__be16)) /* NDA_PORT */
+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
+ + nla_total_size(sizeof(__u16)) /* NDA_VLAN */
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
@@ -433,60 +442,54 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
}
-static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
+static void vxlan_fdb_miss(struct vxlan_dev *vxlan,
+ const struct vxlan_key *key)
{
struct vxlan_fdb f = {
.state = NUD_STALE,
};
INIT_LIST_HEAD(&f.remotes);
- memcpy(f.eth_addr, eth_addr, ETH_ALEN);
+ memcpy(&f.key, key, sizeof(*key));
vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
}
-/* Hash Ethernet address */
-static u32 eth_hash(const unsigned char *addr)
+/* Hash VLAN ID + Ethernet address */
+static u32 vxlan_key_hash(const struct vxlan_key *key)
{
- u64 value = get_unaligned((u64 *)addr);
-
- /* only want 6 bytes */
-#ifdef __BIG_ENDIAN
- value >>= 16;
-#else
- value <<= 16;
-#endif
- return hash_64(value, FDB_HASH_BITS);
+ BUILD_BUG_ON(sizeof(*key) != 8);
+ return hash_64(*(u64 *)key, FDB_HASH_BITS);
}
/* Hash chain to use given mac address */
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
- const u8 *mac)
+ const struct vxlan_key *key)
{
- return &vxlan->fdb_head[eth_hash(mac)];
+ return &vxlan->fdb_head[vxlan_key_hash(key)];
}
-/* Look up Ethernet address in forwarding table */
-static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
- const u8 *mac)
+/* Look up VLAN ID + Ethernet address in forwarding table */
+static struct vxlan_fdb *__vxlan_find_key(struct vxlan_dev *vxlan,
+ const struct vxlan_key *key)
{
- struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
+ struct hlist_head *head = vxlan_fdb_head(vxlan, key);
struct vxlan_fdb *f;
hlist_for_each_entry_rcu(f, head, hlist) {
- if (ether_addr_equal(mac, f->eth_addr))
+ if (!memcmp(key, &f->key, sizeof(*key)))
return f;
}
return NULL;
}
-static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
- const u8 *mac)
+static struct vxlan_fdb *vxlan_find_key(struct vxlan_dev *vxlan,
+ const struct vxlan_key *key)
{
struct vxlan_fdb *f;
- f = __vxlan_find_mac(vxlan, mac);
+ f = __vxlan_find_key(vxlan, key);
if (f)
f->used = jiffies;
@@ -685,7 +688,8 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
/* Add new entry to forwarding table -- assumes lock held */
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
- const u8 *mac, union vxlan_addr *ip,
+ const struct vxlan_key *key,
+ union vxlan_addr *ip,
__u16 state, __u16 flags,
__be16 port, __u32 vni, __u32 ifindex,
__u8 ndm_flags)
@@ -693,11 +697,12 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
int notify = 0;
- f = __vxlan_find_mac(vxlan, mac);
+ f = __vxlan_find_key(vxlan, key);
if (f) {
if (flags & NLM_F_EXCL) {
netdev_dbg(vxlan->dev,
- "lost race to create %pM\n", mac);
+ "lost race to create [%hu] %pM\n",
+ key->vlan_id, key->eth_addr);
return -EEXIST;
}
if (f->state != state) {
@@ -712,8 +717,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
}
if ((flags & NLM_F_REPLACE)) {
/* Only change unicasts */
- if (!(is_multicast_ether_addr(f->eth_addr) ||
- is_zero_ether_addr(f->eth_addr))) {
+ if (!(is_multicast_ether_addr(f->key.eth_addr) ||
+ is_zero_ether_addr(f->key.eth_addr))) {
int rc = vxlan_fdb_replace(f, ip, port, vni,
ifindex);
@@ -724,8 +729,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
return -EOPNOTSUPP;
}
if ((flags & NLM_F_APPEND) &&
- (is_multicast_ether_addr(f->eth_addr) ||
- is_zero_ether_addr(f->eth_addr))) {
+ (is_multicast_ether_addr(f->key.eth_addr) ||
+ is_zero_ether_addr(f->key.eth_addr))) {
int rc = vxlan_fdb_append(f, ip, port, vni, ifindex);
if (rc < 0)
@@ -741,10 +746,12 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
/* Disallow replace to add a multicast entry */
if ((flags & NLM_F_REPLACE) &&
- (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
+ (is_multicast_ether_addr(key->eth_addr) ||
+ is_zero_ether_addr(key->eth_addr)))
return -EOPNOTSUPP;
- netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+ netdev_dbg(vxlan->dev, "add [%hu] %pM -> %pIS\n",
+ key->vlan_id, key->eth_addr, ip);
f = kmalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return -ENOMEM;
@@ -754,13 +761,13 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
f->flags = ndm_flags;
f->updated = f->used = jiffies;
INIT_LIST_HEAD(&f->remotes);
- memcpy(f->eth_addr, mac, ETH_ALEN);
+ memcpy(&f->key, key, sizeof(*key));
vxlan_fdb_append(f, ip, port, vni, ifindex);
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
- vxlan_fdb_head(vxlan, mac));
+ vxlan_fdb_head(vxlan, key));
}
if (notify)
@@ -782,7 +789,7 @@ static void vxlan_fdb_free(struct rcu_head *head)
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
{
netdev_dbg(vxlan->dev,
- "delete %pM\n", f->eth_addr);
+ "delete [%hu] %pM\n", f->key.vlan_id, f->key.eth_addr);
--vxlan->addrcnt;
vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH);
@@ -792,7 +799,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
}
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
- union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex)
+ union vxlan_addr *ip, __be16 *port, u32 *vni,
+ u32 *ifindex, u16 *vlan_id)
{
struct net *net = dev_net(vxlan->dev);
int err;
@@ -814,6 +822,17 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
}
}
+ if (tb[NDA_VLAN]) {
+ if (nla_len(tb[NDA_VLAN]) != sizeof(__u16))
+ return -EINVAL;
+
+ *vlan_id = nla_get_u16(tb[NDA_VLAN]);
+ if (!*vlan_id || *vlan_id >= VLAN_VID_MASK)
+ return -EINVAL;
+ } else {
+ *vlan_id = 0;
+ }
+
if (tb[NDA_PORT]) {
if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
return -EINVAL;
@@ -856,6 +875,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
union vxlan_addr ip;
__be16 port;
u32 vni, ifindex;
+ u16 vlan_id;
+ struct vxlan_key key;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@ -867,12 +888,16 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (tb[NDA_DST] == NULL)
return -EINVAL;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni,
+ &ifindex, &vlan_id);
if (err)
return err;
+ ether_addr_copy(key.eth_addr, addr);
+ key.vlan_id = vlan_id;
+
spin_lock_bh(&vxlan->hash_lock);
- err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
+ err = vxlan_fdb_create(vxlan, &key, &ip, ndm->ndm_state, flags,
port, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock);
@@ -890,16 +915,22 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
union vxlan_addr ip;
__be16 port;
u32 vni, ifindex;
+ u16 vlan_id;
+ struct vxlan_key key;
int err;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni,
+ &ifindex, &vlan_id);
if (err)
return err;
+ ether_addr_copy(key.eth_addr, addr);
+ key.vlan_id = vlan_id;
+
err = -ENOENT;
spin_lock_bh(&vxlan->hash_lock);
- f = vxlan_find_mac(vxlan, addr);
+ f = vxlan_find_key(vxlan, &key);
if (!f)
goto out;
@@ -967,12 +998,13 @@ out:
* Return true if packet is bogus and should be droppped.
*/
static bool vxlan_snoop(struct net_device *dev,
- union vxlan_addr *src_ip, const u8 *src_mac)
+ union vxlan_addr *src_ip,
+ const struct vxlan_key *key)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
- f = vxlan_find_mac(vxlan, src_mac);
+ f = vxlan_find_key(vxlan, key);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
@@ -985,8 +1017,9 @@ static bool vxlan_snoop(struct net_device *dev,
if (net_ratelimit())
netdev_info(dev,
- "%pM migrated from %pIS to %pIS\n",
- src_mac, &rdst->remote_ip, &src_ip);
+ "[%hu] %pM migrated from %pIS to %pIS\n",
+ key->vlan_id, key->eth_addr,
+ &rdst->remote_ip, &src_ip);
rdst->remote_ip = *src_ip;
f->updated = jiffies;
@@ -997,7 +1030,7 @@ static bool vxlan_snoop(struct net_device *dev,
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
- vxlan_fdb_create(vxlan, src_mac, src_ip,
+ vxlan_fdb_create(vxlan, key, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
vxlan->dst_port,
@@ -1187,6 +1220,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
__u32 vni;
int err = 0;
union vxlan_addr *remote_ip;
+ struct vxlan_key key;
vni = ntohl(vx_vni) >> 8;
/* Is this VNI defined? */
@@ -1215,8 +1249,14 @@ static void vxlan_rcv(struct vxlan_sock *vs,
#endif
}
+ ether_addr_copy(key.eth_addr, eth_hdr(skb)->h_source);
+ if (ntohs(eth_hdr(skb)->h_proto) == ETH_P_8021Q)
+ key.vlan_id = ntohs(vlan_eth_hdr(skb)->h_vlan_TCI) & VLAN_VID_MASK;
+ else
+ key.vlan_id = 0;
+
if ((vxlan->flags & VXLAN_F_LEARN) &&
- vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
+ vxlan_snoop(skb->dev, &saddr, &key))
goto drop;
skb_reset_network_header(skb);
@@ -1297,13 +1337,17 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
if (n) {
struct vxlan_fdb *f;
struct sk_buff *reply;
+ struct vxlan_key key;
if (!(n->nud_state & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
- f = vxlan_find_mac(vxlan, n->ha);
+ ether_addr_copy(key.eth_addr, n->ha);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ f = vxlan_find_key(vxlan, &key);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */
neigh_release(n);
@@ -1318,6 +1362,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
if (reply == NULL)
goto out;
+ if (vlan_tx_tag_present(skb))
+ __vlan_hwaccel_put_tag(reply, skb->vlan_proto, skb->vlan_tci);
+
skb_reset_mac_header(reply);
__skb_pull(reply, skb_network_offset(reply));
reply->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1462,13 +1509,17 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
if (n) {
struct vxlan_fdb *f;
struct sk_buff *reply;
+ struct vxlan_key key;
if (!(n->nud_state & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
- f = vxlan_find_mac(vxlan, n->ha);
+ ether_addr_copy(key.eth_addr, n->ha);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ f = vxlan_find_key(vxlan, &key);
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
/* bridge-local neighbor */
neigh_release(n);
@@ -1483,6 +1534,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
if (reply == NULL)
goto out;
+ if (vlan_tx_tag_present(skb))
+ __vlan_hwaccel_put_tag(reply, skb->vlan_proto, skb->vlan_tci);
+
if (netif_rx_ni(reply) == NET_RX_DROP)
dev->stats.rx_dropped++;
@@ -1782,8 +1836,14 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
#endif
}
- if (dst_vxlan->flags & VXLAN_F_LEARN)
- vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source);
+ if (dst_vxlan->flags & VXLAN_F_LEARN) {
+ struct vxlan_key key;
+
+ ether_addr_copy(key.eth_addr, eth_hdr(skb)->h_source);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ vxlan_snoop(skb->dev, &loopback, &key);
+ }
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
@@ -1962,6 +2022,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
bool did_rsc = false;
struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f;
+ struct vxlan_key key;
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
@@ -1983,23 +2044,28 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
#endif
}
- f = vxlan_find_mac(vxlan, eth->h_dest);
+ ether_addr_copy(key.eth_addr, eth->h_dest);
+ key.vlan_id = vlan_tx_tag_present(skb) ? vlan_tx_tag_get_id(skb) : 0;
+
+ f = vxlan_find_key(vxlan, &key);
did_rsc = false;
if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
(ntohs(eth->h_proto) == ETH_P_IP ||
ntohs(eth->h_proto) == ETH_P_IPV6)) {
did_rsc = route_shortcircuit(dev, skb);
- if (did_rsc)
- f = vxlan_find_mac(vxlan, eth->h_dest);
+ if (did_rsc) {
+ ether_addr_copy(key.eth_addr, eth->h_dest);
+ f = vxlan_find_key(vxlan, &key);
+ }
}
if (f == NULL) {
- f = vxlan_find_mac(vxlan, all_zeros_mac);
+ f = vxlan_find_key(vxlan, &all_zeros_key);
if (f == NULL) {
if ((vxlan->flags & VXLAN_F_L2MISS) &&
!is_multicast_ether_addr(eth->h_dest))
- vxlan_fdb_miss(vxlan, eth->h_dest);
+ vxlan_fdb_miss(vxlan, &key);
dev->stats.tx_dropped++;
kfree_skb(skb);
@@ -2050,8 +2116,8 @@ static void vxlan_cleanup(unsigned long arg)
timeout = f->used + vxlan->age_interval * HZ;
if (time_before_eq(timeout, jiffies)) {
netdev_dbg(vxlan->dev,
- "garbage collect %pM\n",
- f->eth_addr);
+ "garbage collect [%hu] %pM\n",
+ f->key.vlan_id, f->key.eth_addr);
f->state = NUD_STALE;
vxlan_fdb_destroy(vxlan, f);
} else if (time_before(timeout, next_timer))
@@ -2103,7 +2169,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
struct vxlan_fdb *f;
spin_lock_bh(&vxlan->hash_lock);
- f = __vxlan_find_mac(vxlan, all_zeros_mac);
+ f = __vxlan_find_key(vxlan, &all_zeros_key);
if (f)
vxlan_fdb_destroy(vxlan, f);
spin_unlock_bh(&vxlan->hash_lock);
@@ -2154,8 +2220,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
- /* the all_zeros_mac entry is deleted at vxlan_uninit */
- if (!is_zero_ether_addr(f->eth_addr))
+ /* the all_zeros_key entry is deleted at vxlan_uninit */
+ if (*(u64 *)&f->key)
vxlan_fdb_destroy(vxlan, f);
}
}
@@ -2706,7 +2772,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
/* create an fdb entry for a valid default destination */
if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
- err = vxlan_fdb_create(vxlan, all_zeros_mac,
+ err = vxlan_fdb_create(vxlan, &all_zeros_key,
&vxlan->default_dst.remote_ip,
NUD_REACHABLE|NUD_PERMANENT,
NLM_F_EXCL|NLM_F_CREATE,
--
1.8.1.5
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists