[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <adcd9bc0-49b4-5aaf-5bf8-13fe212cb934@gmail.com>
Date: Mon, 24 Sep 2018 17:39:47 -0600
From: David Ahern <dsahern@...il.com>
To: Mike Manning <mmanning@...tta.att-mail.com>, netdev@...r.kernel.org
Subject: Re: [PATCH net-next v1 2/5] ipv6: allow link-local and multicast
packets inside vrf
On 9/24/18 10:13 AM, Mike Manning wrote:
> Packets that are multicast or to link-local addresses are not enslaved
> to the vrf of the socket that they are received on. This is needed for
> NDISC, but breaks applications that rely on receiving such packets when
> in a VRF. Also to make IPv6 consistent with IPv4 which does handle
> multicast packets as being enslaved, modify the VRF driver to do the
> same for IPv6. As a result, the multicast address check needs to verify
> the address against the enslaved rather than the l3mdev device.
>
> Signed-off-by: Mike Manning <mmanning@...tta.att-mail.com>
> ---
> drivers/net/vrf.c | 19 +++++++++----------
> net/ipv6/ip6_input.c | 19 ++++++++++++++++++-
> net/ipv6/ipv6_sockglue.c | 2 +-
> 3 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> index f93547f257fb..9d817c19f3b4 100644
> --- a/drivers/net/vrf.c
> +++ b/drivers/net/vrf.c
> @@ -981,24 +981,23 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
> struct sk_buff *skb)
> {
> int orig_iif = skb->skb_iif;
> - bool need_strict;
> + bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
> + bool is_ndisc = ipv6_ndisc_frame(skb);
>
> - /* loopback traffic; do not push through packet taps again.
> - * Reset pkt_type for upper layers to process skb
> + /* loopback, multicast & non-ND link-local traffic; do not push through
> + * packet taps again. Reset pkt_type for upper layers to process skb
> */
> - if (skb->pkt_type == PACKET_LOOPBACK) {
> + if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
> skb->dev = vrf_dev;
> skb->skb_iif = vrf_dev->ifindex;
> IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
> - skb->pkt_type = PACKET_HOST;
> + if (skb->pkt_type == PACKET_LOOPBACK)
> + skb->pkt_type = PACKET_HOST;
> goto out;
> }
I'm not so sure about this change. Linklocal by definition means packets
should not leave the interface the LLA is assigned to. Will need to test
this outside of the other patches which needs to be another day.
>
> - /* if packet is NDISC or addressed to multicast or link-local
> - * then keep the ingress interface
> - */
> - need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
> - if (!ipv6_ndisc_frame(skb) && !need_strict) {
> + /* if packet is NDISC then keep the ingress interface */
> + if (!is_ndisc) {
> vrf_rx_stats(vrf_dev, skb->len);
> skb->dev = vrf_dev;
> skb->skb_iif = vrf_dev->ifindex;
> diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
> index 96577e742afd..108f5f88ec98 100644
> --- a/net/ipv6/ip6_input.c
> +++ b/net/ipv6/ip6_input.c
> @@ -432,15 +432,32 @@ EXPORT_SYMBOL_GPL(ip6_input);
>
> int ip6_mc_input(struct sk_buff *skb)
> {
> + int sdif = inet6_sdif(skb);
> const struct ipv6hdr *hdr;
> + struct net_device *dev;
> bool deliver;
>
> __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
> __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
> skb->len);
>
> + /* skb->dev passed may be master dev for vrfs. */
> + if (sdif) {
> + rcu_read_lock();
> + dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif);
> + if (!dev) {
> + rcu_read_unlock();
> + kfree_skb(skb);
> + return -ENODEV;
> + }
> + } else {
> + dev = skb->dev;
> + }
> +
> hdr = ipv6_hdr(skb);
> - deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
> + deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL);
> + if (sdif)
> + rcu_read_unlock();
>
> #ifdef CONFIG_IPV6_MROUTE
> /*
> diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
> index 7dfbc797b130..4ebd395dd3df 100644
> --- a/net/ipv6/ipv6_sockglue.c
> +++ b/net/ipv6/ipv6_sockglue.c
> @@ -486,7 +486,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
> retv = -EFAULT;
> break;
> }
> - if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
> + if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex))
> goto e_inval;
>
> np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
>
Make this setsockopt change a separate patch. It is not related to the
Rx packet path but following the trend of other setsockopts allowing
sk_bound_dev_if to be the l3mdev and then PKTINFO, UNICAST_IF, etc call
to an enslaved device.
Powered by blists - more mailing lists