[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJieiUj8ro0p_ykirP3M9JtEiHC=28QyV5xHX3KXAsepNhS+rQ@mail.gmail.com>
Date: Tue, 20 Nov 2018 07:25:27 -0800
From: Roopa Prabhu <roopa@...ulusnetworks.com>
To: abauvin@...leway.com
Cc: David Ahern <dsa@...ulusnetworks.com>,
netdev <netdev@...r.kernel.org>, akherbouche@...leway.com
Subject: Re: [RFC v3 2/3] vxlan: add support for underlay in non-default VRF
On Tue, Nov 20, 2018 at 6:23 AM Alexis Bauvin <abauvin@...leway.com> wrote:
>
> Creating a VXLAN device with is underlay in the non-default VRF makes
> egress route lookup fail or incorrect since it will resolve in the
> default VRF, and ingress fail because the socket listens in the default
> VRF.
>
> This patch binds the underlying UDP tunnel socket to the l3mdev of the
> lower device of the VXLAN device. This will listen in the proper VRF and
> output traffic from said l3mdev, matching l3mdev routing rules and
> looking up the correct routing table.
>
> When the VXLAN device does not have a lower device, or the lower device
> is in the default VRF, the socket will not be bound to any interface,
> keeping the previous behaviour.
>
> The underlay l3mdev is deduced from the VXLAN lower device
> (IFLA_VXLAN_LINK).
>
> The l3mdev_master_upper_ifindex_by_index function has been added to
> l3mdev. Its goal is to fetch the effective l3mdev of an interface which
> is not a direct slave of said l3mdev. It handles the following example,
> properly resolving the l3mdev of eth0 to vrf-blue:
>
> +----------+ +---------+
> | | | |
> | vrf-blue | | vrf-red |
> | | | |
> +----+-----+ +----+----+
> | |
> | |
> +----+-----+ +----+----+
> | | | |
> | br-blue | | br-red |
> | | | |
> +----+-----+ +---+-+---+
> | | |
> | +-----+ +-----+
> | | |
> +----+-----+ +------+----+ +----+----+
> | | lower device | | | |
> | eth0 | <- - - - - - - | vxlan-red | | tap-red | (... more taps)
> | | | | | |
> +----------+ +-----------+ +---------+
>
> Signed-off-by: Alexis Bauvin <abauvin@...leway.com>
> Reviewed-by: Amine Kherbouche <akherbouche@...leway.com>
> Tested-by: Amine Kherbouche <akherbouche@...leway.com>
> ---
> drivers/net/vxlan.c | 32 ++++++++++++++++++++++++--------
> include/net/l3mdev.h | 22 ++++++++++++++++++++++
> net/l3mdev/l3mdev.c | 18 ++++++++++++++++++
> 3 files changed, 64 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 27bd586b94b0..a3de08122269 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -212,7 +212,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
> * and enabled unshareable flags.
> */
> static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
> - __be16 port, u32 flags)
> + __be16 port, u32 flags, int ifindex)
> {
> struct vxlan_sock *vs;
>
> @@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
> hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
> if (inet_sk(vs->sock->sk)->inet_sport == port &&
> vxlan_get_sk_family(vs) == family &&
> - vs->flags == flags)
> + vs->flags == flags &&
> + vs->sock->sk->sk_bound_dev_if == ifindex)
> return vs;
> }
> return NULL;
> @@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
> {
> struct vxlan_sock *vs;
>
> - vs = vxlan_find_sock(net, family, port, flags);
> + vs = vxlan_find_sock(net, family, port, flags, ifindex);
> if (!vs)
> return NULL;
>
> @@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
> struct rtable *rt;
> __be16 df = 0;
>
> + if (!ifindex)
> + ifindex = sock4->sock->sk->sk_bound_dev_if;
> +
> rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
> dst->sin.sin_addr.s_addr,
> &local_ip.sin.sin_addr.s_addr,
> @@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
> } else {
> struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
>
> + if (!ifindex)
> + ifindex = sock6->sock->sk->sk_bound_dev_if;
> +
> ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
> label, &dst->sin6.sin6_addr,
> &local_ip.sin6.sin6_addr,
> @@ -2813,7 +2820,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
> };
>
> static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
> - __be16 port, u32 flags)
> + __be16 port, u32 flags, int ifindex)
> {
> struct socket *sock;
> struct udp_port_cfg udp_conf;
> @@ -2831,6 +2838,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
> }
>
> udp_conf.local_udp_port = port;
> + udp_conf.bind_ifindex = ifindex;
>
> /* Open UDP socket */
> err = udp_sock_create(net, &udp_conf, &sock);
> @@ -2842,7 +2850,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>
> /* Create new listen socket if needed */
> static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
> - __be16 port, u32 flags)
> + __be16 port, u32 flags,
> + int ifindex)
> {
> struct vxlan_net *vn = net_generic(net, vxlan_net_id);
> struct vxlan_sock *vs;
> @@ -2857,7 +2866,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
> for (h = 0; h < VNI_HASH_SIZE; ++h)
> INIT_HLIST_HEAD(&vs->vni_list[h]);
>
> - sock = vxlan_create_sock(net, ipv6, port, flags);
> + sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
> if (IS_ERR(sock)) {
> kfree(vs);
> return ERR_CAST(sock);
> @@ -2894,11 +2903,17 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
> struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
> struct vxlan_sock *vs = NULL;
> struct vxlan_dev_node *node;
> + int l3mdev_index;
> +
> + l3mdev_index =
> + l3mdev_master_upper_ifindex_by_index(vxlan->net,
> + vxlan->cfg.remote_ifindex);
vxlan->cfg.remote_ifindex is optional, so we can avoid trying to
derive the l3mdev_ifindex for cases where it is not present
>
> if (!vxlan->cfg.no_share) {
> spin_lock(&vn->sock_lock);
> vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
> - vxlan->cfg.dst_port, vxlan->cfg.flags);
> + vxlan->cfg.dst_port, vxlan->cfg.flags,
> + l3mdev_index);
> if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
> spin_unlock(&vn->sock_lock);
> return -EBUSY;
> @@ -2907,7 +2922,8 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
> }
> if (!vs)
> vs = vxlan_socket_create(vxlan->net, ipv6,
> - vxlan->cfg.dst_port, vxlan->cfg.flags);
> + vxlan->cfg.dst_port, vxlan->cfg.flags,
> + l3mdev_index);
> if (IS_ERR(vs))
> return PTR_ERR(vs);
> #if IS_ENABLED(CONFIG_IPV6)
> diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
> index 3832099289c5..78fa0ac4613c 100644
> --- a/include/net/l3mdev.h
> +++ b/include/net/l3mdev.h
> @@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
> return master;
> }
>
> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex);
> +static inline
> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
> +{
> + rcu_read_lock();
> + ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex);
> + rcu_read_unlock();
> +
> + return ifindex;
> +}
> +
> u32 l3mdev_fib_table_rcu(const struct net_device *dev);
> u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
> static inline u32 l3mdev_fib_table(const struct net_device *dev)
> @@ -207,6 +218,17 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
> return 0;
> }
>
> +static inline
> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
> +{
> + return 0;
> +}
> +static inline
> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
> +{
> + return 0;
> +}
> +
> static inline
> struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
> {
> diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
> index 8da86ceca33d..309dee76724e 100644
> --- a/net/l3mdev/l3mdev.c
> +++ b/net/l3mdev/l3mdev.c
> @@ -46,6 +46,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
> }
> EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
>
> +/**
> + * l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
> + * device
> + * @net: network namespace for device index lookup
> + * @ifindex: targeted interface
> + */
> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
> +{
> + struct net_device *dev;
> +
> + dev = dev_get_by_index_rcu(net, ifindex);
> + while (dev && !netif_is_l3_master(dev))
> + dev = netdev_master_upper_dev_get(dev);
> +
> + return dev ? dev->ifindex : 0;
> +}
> +EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
> +
> /**
> * l3mdev_fib_table - get FIB table id associated with an L3
> * master interface
> --
>
Powered by blists - more mailing lists