[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181122010713.3995-4-abauvin@scaleway.com>
Date: Thu, 22 Nov 2018 02:07:11 +0100
From: Alexis Bauvin <abauvin@...leway.com>
To: dsa@...ulusnetworks.com, roopa@...ulusnetworks.com
Cc: netdev@...r.kernel.org, abauvin@...leway.com,
akherbouche@...leway.com
Subject: [RFC v4 3/5] vxlan: add support for underlay in non-default VRF
Creating a VXLAN device with is underlay in the non-default VRF makes
egress route lookup fail or incorrect since it will resolve in the
default VRF, and ingress fail because the socket listens in the default
VRF.
This patch binds the underlying UDP tunnel socket to the l3mdev of the
lower device of the VXLAN device. This will listen in the proper VRF and
output traffic from said l3mdev, matching l3mdev routing rules and
looking up the correct routing table.
When the VXLAN device does not have a lower device, or the lower device
is in the default VRF, the socket will not be bound to any interface,
keeping the previous behaviour.
The underlay l3mdev is deduced from the VXLAN lower device
(IFLA_VXLAN_LINK).
+----------+ +---------+
| | | |
| vrf-blue | | vrf-red |
| | | |
+----+-----+ +----+----+
| |
| |
+----+-----+ +----+----+
| | | |
| br-blue | | br-red |
| | | |
+----+-----+ +---+-+---+
| | |
| +-----+ +-----+
| | |
+----+-----+ +------+----+ +----+----+
| | lower device | | | |
| eth0 | <- - - - - - - | vxlan-red | | tap-red | (... more taps)
| | | | | |
+----------+ +-----------+ +---------+
Signed-off-by: Alexis Bauvin <abauvin@...leway.com>
Reviewed-by: Amine Kherbouche <akherbouche@...leway.com>
Tested-by: Amine Kherbouche <akherbouche@...leway.com>
---
drivers/net/vxlan.c | 32 +++++--
.../selftests/net/test_vxlan_under_vrf.sh | 90 +++++++++++++++++++
2 files changed, 114 insertions(+), 8 deletions(-)
create mode 100755 tools/testing/selftests/net/test_vxlan_under_vrf.sh
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 27bd586b94b0..8ba0a57ff958 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -212,7 +212,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
* and enabled unshareable flags.
*/
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
{
struct vxlan_sock *vs;
@@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
vxlan_get_sk_family(vs) == family &&
- vs->flags == flags)
+ vs->flags == flags &&
+ vs->sock->sk->sk_bound_dev_if == ifindex)
return vs;
}
return NULL;
@@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
{
struct vxlan_sock *vs;
- vs = vxlan_find_sock(net, family, port, flags);
+ vs = vxlan_find_sock(net, family, port, flags, ifindex);
if (!vs)
return NULL;
@@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
__be16 df = 0;
+ if (!ifindex)
+ ifindex = sock4->sock->sk->sk_bound_dev_if;
+
rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
dst->sin.sin_addr.s_addr,
&local_ip.sin.sin_addr.s_addr,
@@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+ if (!ifindex)
+ ifindex = sock6->sock->sk->sk_bound_dev_if;
+
ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
label, &dst->sin6.sin6_addr,
&local_ip.sin6.sin6_addr,
@@ -2813,7 +2820,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
};
static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
{
struct socket *sock;
struct udp_port_cfg udp_conf;
@@ -2831,6 +2838,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
}
udp_conf.local_udp_port = port;
+ udp_conf.bind_ifindex = ifindex;
/* Open UDP socket */
err = udp_sock_create(net, &udp_conf, &sock);
@@ -2842,7 +2850,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+ __be16 port, u32 flags,
+ int ifindex)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
@@ -2857,7 +2866,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
- sock = vxlan_create_sock(net, ipv6, port, flags);
+ sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
if (IS_ERR(sock)) {
kfree(vs);
return ERR_CAST(sock);
@@ -2894,11 +2903,17 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = NULL;
struct vxlan_dev_node *node;
+ int l3mdev_index = 0;
+
+ if (vxlan->cfg.remote_ifindex)
+ l3mdev_index = l3mdev_master_upper_ifindex_by_index(
+ vxlan->net, vxlan->cfg.remote_ifindex);
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->cfg.flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
+ l3mdev_index);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
spin_unlock(&vn->sock_lock);
return -EBUSY;
@@ -2907,7 +2922,8 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->cfg.flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
+ l3mdev_index);
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
new file mode 100755
index 000000000000..9ee906d5d333
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking VXLAN underlay in a non-default VRF.
+
+set -e
+
+cleanup() {
+ ip link del veth-hv-1 || true
+ ip link del veth-tap || true
+
+ for ns in hv-1 hv-2 vm-1 vm-2; do
+ ip netns del $ns || true
+ done
+}
+
+# Clean start
+cleanup &> /dev/null
+
+[[ $1 == "clean" ]] && exit 0
+
+trap cleanup EXIT
+
+# Setup "Hypervisors" simulated with netns
+ip link add veth-hv-1 type veth peer name veth-hv-2
+setup-hv-networking() {
+ hv=$1
+
+ ip netns add hv-$hv
+ ip link set veth-hv-$hv netns hv-$hv
+ ip netns exec hv-$hv ip link set veth-hv-$hv name veth0
+
+ ip netns exec hv-$hv ip link add vrf-underlay type vrf table 1
+ ip netns exec hv-$hv ip link set vrf-underlay up
+ ip netns exec hv-$hv ip addr add 172.16.0.$hv/24 dev veth0
+ ip netns exec hv-$hv ip link set veth0 up
+
+ ip netns exec hv-$hv ip link add br0 type bridge
+ ip netns exec hv-$hv ip link set br0 up
+
+ ip netns exec hv-$hv ip link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789
+ ip netns exec hv-$hv ip link set vxlan0 master br0
+ ip netns exec hv-$hv ip link set vxlan0 up
+}
+setup-hv-networking 1
+setup-hv-networking 2
+
+# Check connectivity between HVs by pinging hv-2 from hv-1
+echo Checking HV connectivity
+ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo FAIL; false)
+
+# Setups a "VM" simulated by a netns an a veth pair
+setup-vm() {
+ id=$1
+
+ ip netns add vm-$id
+ ip link add veth-tap type veth peer name veth-hv
+
+ ip link set veth-tap netns hv-$id
+ ip netns exec hv-$id ip link set veth-tap master br0
+ ip netns exec hv-$id ip link set veth-tap up
+
+ ip link set veth-hv netns vm-$id
+ ip netns exec vm-$id ip addr add 10.0.0.$id/24 dev veth-hv
+ ip netns exec vm-$id ip link set veth-hv up
+}
+setup-vm 1
+setup-vm 2
+
+# Setup VTEP routes to make ARP work
+ip netns exec hv-1 bridge fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
+ip netns exec hv-2 bridge fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
+
+echo "Check VM connectivity through VXLAN (underlay in the default VRF)"
+ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo FAIL; false)
+
+# Move the underlay to a non-default VRF
+ip netns exec hv-1 ip link set veth0 vrf vrf-underlay
+ip netns exec hv-1 ip link set veth0 down
+ip netns exec hv-1 ip link set veth0 up
+ip netns exec hv-2 ip link set veth0 vrf vrf-underlay
+ip netns exec hv-2 ip link set veth0 down
+ip netns exec hv-2 ip link set veth0 up
+
+echo "Check VM connectivity through VXLAN (underlay in a VRF)"
+ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo FAIL; false)
+
+echo SUCCESS
+
+cleanup &> /dev/null
--
Powered by blists - more mailing lists