lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <C87A2B24-56C8-4183-8E5B-FCB9DAC0BB50@scaleway.com>
Date:   Tue, 20 Nov 2018 17:14:19 +0100
From:   Alexis Bauvin <abauvin@...leway.com>
To:     Roopa Prabhu <roopa@...ulusnetworks.com>
Cc:     David Ahern <dsa@...ulusnetworks.com>,
        netdev <netdev@...r.kernel.org>, akherbouche@...leway.com
Subject: Re: [RFC v3 2/3] vxlan: add support for underlay in non-default VRF

Le 20 nov. 2018 à 16:25, Roopa Prabhu <roopa@...ulusnetworks.com> a écrit :
> 
> On Tue, Nov 20, 2018 at 6:23 AM Alexis Bauvin <abauvin@...leway.com> wrote:
>> 
>> Creating a VXLAN device with is underlay in the non-default VRF makes
>> egress route lookup fail or incorrect since it will resolve in the
>> default VRF, and ingress fail because the socket listens in the default
>> VRF.
>> 
>> This patch binds the underlying UDP tunnel socket to the l3mdev of the
>> lower device of the VXLAN device. This will listen in the proper VRF and
>> output traffic from said l3mdev, matching l3mdev routing rules and
>> looking up the correct routing table.
>> 
>> When the VXLAN device does not have a lower device, or the lower device
>> is in the default VRF, the socket will not be bound to any interface,
>> keeping the previous behaviour.
>> 
>> The underlay l3mdev is deduced from the VXLAN lower device
>> (IFLA_VXLAN_LINK).
>> 
>> The l3mdev_master_upper_ifindex_by_index function has been added to
>> l3mdev. Its goal is to fetch the effective l3mdev of an interface which
>> is not a direct slave of said l3mdev. It handles the following example,
>> properly resolving the l3mdev of eth0 to vrf-blue:
>> 
>> +----------+                         +---------+
>> |          |                         |         |
>> | vrf-blue |                         | vrf-red |
>> |          |                         |         |
>> +----+-----+                         +----+----+
>>     |                                    |
>>     |                                    |
>> +----+-----+                         +----+----+
>> |          |                         |         |
>> | br-blue  |                         | br-red  |
>> |          |                         |         |
>> +----+-----+                         +---+-+---+
>>     |                                   | |
>>     |                             +-----+ +-----+
>>     |                             |             |
>> +----+-----+                +------+----+   +----+----+
>> |          |  lower device  |           |   |         |
>> |   eth0   | <- - - - - - - | vxlan-red |   | tap-red | (... more taps)
>> |          |                |           |   |         |
>> +----------+                +-----------+   +---------+
>> 
>> Signed-off-by: Alexis Bauvin <abauvin@...leway.com>
>> Reviewed-by: Amine Kherbouche <akherbouche@...leway.com>
>> Tested-by: Amine Kherbouche <akherbouche@...leway.com>
>> ---
>> drivers/net/vxlan.c  | 32 ++++++++++++++++++++++++--------
>> include/net/l3mdev.h | 22 ++++++++++++++++++++++
>> net/l3mdev/l3mdev.c  | 18 ++++++++++++++++++
>> 3 files changed, 64 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index 27bd586b94b0..a3de08122269 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -212,7 +212,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
>>  * and enabled unshareable flags.
>>  */
>> static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
>> -                                         __be16 port, u32 flags)
>> +                                         __be16 port, u32 flags, int ifindex)
>> {
>>        struct vxlan_sock *vs;
>> 
>> @@ -221,7 +221,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
>>        hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
>>                if (inet_sk(vs->sock->sk)->inet_sport == port &&
>>                    vxlan_get_sk_family(vs) == family &&
>> -                   vs->flags == flags)
>> +                   vs->flags == flags &&
>> +                   vs->sock->sk->sk_bound_dev_if == ifindex)
>>                        return vs;
>>        }
>>        return NULL;
>> @@ -261,7 +262,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
>> {
>>        struct vxlan_sock *vs;
>> 
>> -       vs = vxlan_find_sock(net, family, port, flags);
>> +       vs = vxlan_find_sock(net, family, port, flags, ifindex);
>>        if (!vs)
>>                return NULL;
>> 
>> @@ -2172,6 +2173,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>>                struct rtable *rt;
>>                __be16 df = 0;
>> 
>> +               if (!ifindex)
>> +                       ifindex = sock4->sock->sk->sk_bound_dev_if;
>> +
>>                rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
>>                                     dst->sin.sin_addr.s_addr,
>>                                     &local_ip.sin.sin_addr.s_addr,
>> @@ -2210,6 +2214,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>>        } else {
>>                struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
>> 
>> +               if (!ifindex)
>> +                       ifindex = sock6->sock->sk->sk_bound_dev_if;
>> +
>>                ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
>>                                        label, &dst->sin6.sin6_addr,
>>                                        &local_ip.sin6.sin6_addr,
>> @@ -2813,7 +2820,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
>> };
>> 
>> static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>> -                                       __be16 port, u32 flags)
>> +                                       __be16 port, u32 flags, int ifindex)
>> {
>>        struct socket *sock;
>>        struct udp_port_cfg udp_conf;
>> @@ -2831,6 +2838,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>>        }
>> 
>>        udp_conf.local_udp_port = port;
>> +       udp_conf.bind_ifindex = ifindex;
>> 
>>        /* Open UDP socket */
>>        err = udp_sock_create(net, &udp_conf, &sock);
>> @@ -2842,7 +2850,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>> 
>> /* Create new listen socket if needed */
>> static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
>> -                                             __be16 port, u32 flags)
>> +                                             __be16 port, u32 flags,
>> +                                             int ifindex)
>> {
>>        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>>        struct vxlan_sock *vs;
>> @@ -2857,7 +2866,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
>>        for (h = 0; h < VNI_HASH_SIZE; ++h)
>>                INIT_HLIST_HEAD(&vs->vni_list[h]);
>> 
>> -       sock = vxlan_create_sock(net, ipv6, port, flags);
>> +       sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
>>        if (IS_ERR(sock)) {
>>                kfree(vs);
>>                return ERR_CAST(sock);
>> @@ -2894,11 +2903,17 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
>>        struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
>>        struct vxlan_sock *vs = NULL;
>>        struct vxlan_dev_node *node;
>> +       int l3mdev_index;
>> +
>> +       l3mdev_index =
>> +               l3mdev_master_upper_ifindex_by_index(vxlan->net,
>> +                                                    vxlan->cfg.remote_ifindex);
> 
> vxlan->cfg.remote_ifindex is optional, so we can avoid trying to
> derive the l3mdev_ifindex for cases where it is not present

So you do suggest to check first remote_ifindex, and derive
the l3mdev_ifindex only if not zero? If so, I will add it
in next version.

>> 
>>        if (!vxlan->cfg.no_share) {
>>                spin_lock(&vn->sock_lock);
>>                vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
>> -                                    vxlan->cfg.dst_port, vxlan->cfg.flags);
>> +                                    vxlan->cfg.dst_port, vxlan->cfg.flags,
>> +                                    l3mdev_index);
>>                if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
>>                        spin_unlock(&vn->sock_lock);
>>                        return -EBUSY;
>> @@ -2907,7 +2922,8 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
>>        }
>>        if (!vs)
>>                vs = vxlan_socket_create(vxlan->net, ipv6,
>> -                                        vxlan->cfg.dst_port, vxlan->cfg.flags);
>> +                                        vxlan->cfg.dst_port, vxlan->cfg.flags,
>> +                                        l3mdev_index);
>>        if (IS_ERR(vs))
>>                return PTR_ERR(vs);
>> #if IS_ENABLED(CONFIG_IPV6)
>> diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
>> index 3832099289c5..78fa0ac4613c 100644
>> --- a/include/net/l3mdev.h
>> +++ b/include/net/l3mdev.h
>> @@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
>>        return master;
>> }
>> 
>> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex);
>> +static inline
>> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
>> +{
>> +       rcu_read_lock();
>> +       ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex);
>> +       rcu_read_unlock();
>> +
>> +       return ifindex;
>> +}
>> +
>> u32 l3mdev_fib_table_rcu(const struct net_device *dev);
>> u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
>> static inline u32 l3mdev_fib_table(const struct net_device *dev)
>> @@ -207,6 +218,17 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
>>        return 0;
>> }
>> 
>> +static inline
>> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
>> +{
>> +       return 0;
>> +}
>> +static inline
>> +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
>> +{
>> +       return 0;
>> +}
>> +
>> static inline
>> struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
>> {
>> diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
>> index 8da86ceca33d..309dee76724e 100644
>> --- a/net/l3mdev/l3mdev.c
>> +++ b/net/l3mdev/l3mdev.c
>> @@ -46,6 +46,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
>> }
>> EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
>> 
>> +/**
>> + *     l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
>> + *                                            device
>> + *     @net: network namespace for device index lookup
>> + *     @ifindex: targeted interface
>> + */
>> +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
>> +{
>> +       struct net_device *dev;
>> +
>> +       dev = dev_get_by_index_rcu(net, ifindex);
>> +       while (dev && !netif_is_l3_master(dev))
>> +               dev = netdev_master_upper_dev_get(dev);
>> +
>> +       return dev ? dev->ifindex : 0;
>> +}
>> +EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
>> +
>> /**
>>  *     l3mdev_fib_table - get FIB table id associated with an L3
>>  *                             master interface
>> --

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ