netdev - Re: [PATCH net 1/2] bpf: Fix bpf_redirect to an ipip/ip6tnl dev

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 10 Jan 2019 15:41:57 -0500
From:   Willem de Bruijn <willemdebruijn.kernel@...il.com>
To:     Martin KaFai Lau <kafai@...com>
Cc:     Network Development <netdev@...r.kernel.org>,
        Alexei Starovoitov <ast@...com>,
        Daniel Borkmann <daniel@...earbox.net>, tgraf@...g.ch
Subject: Re: [PATCH net 1/2] bpf: Fix bpf_redirect to an ipip/ip6tnl dev

On Wed, Nov 9, 2016 at 6:57 PM Martin KaFai Lau <kafai@...com> wrote:
>
> If the bpf program calls bpf_redirect(dev, 0) and dev is
> an ipip/ip6tnl, it currently includes the mac header.
> e.g. If dev is ipip, the end result is IP-EthHdr-IP instead
> of IP-IP.
>
> The fix is to pull the mac header.  At ingress, skb_postpull_rcsum()
> is not needed because the ethhdr should have been pulled once already
> and then got pushed back just before calling the bpf_prog.
> At egress, this patch calls skb_postpull_rcsum().
>
> If bpf_redirect(dev, BPF_F_INGRESS) is called,
> it also fails now because it calls dev_forward_skb() which
> eventually calls eth_type_trans(skb, dev).  The eth_type_trans()
> will set skb->type = PACKET_OTHERHOST because the mac address
> does not match the redirecting dev->dev_addr.  The PACKET_OTHERHOST
> will eventually cause the ip_rcv() errors out.  To fix this,
> ____dev_forward_skb() is added.
>
> Joint work with Daniel Borkmann.
>
> Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel")
> Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels")
> Acked-by: Daniel Borkmann <daniel@...earbox.net>
> Acked-by: Alexei Starovoitov <ast@...com>
> Signed-off-by: Martin KaFai Lau <kafai@...com>
> ---
>  include/linux/netdevice.h | 15 +++++++++++
>  net/core/dev.c            | 17 +++++-------
>  net/core/filter.c         | 68 +++++++++++++++++++++++++++++++++++++++++------
>  3 files changed, 81 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 91ee364..bf04a46 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
>  bool is_skb_forwardable(const struct net_device *dev,
>                         const struct sk_buff *skb);
>
> +static __always_inline int ____dev_forward_skb(struct net_device *dev,
> +                                              struct sk_buff *skb)
> +{
> +       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
> +           unlikely(!is_skb_forwardable(dev, skb))) {
> +               atomic_long_inc(&dev->rx_dropped);
> +               kfree_skb(skb);
> +               return NET_RX_DROP;
> +       }
> +
> +       skb_scrub_packet(skb, true);
> +       skb->priority = 0;
> +       return 0;
> +}
> +
>  void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
>
>  extern int             netdev_budget;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index eaad4c2..6666b28 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
>
>  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
>  {
> -       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
> -           unlikely(!is_skb_forwardable(dev, skb))) {
> -               atomic_long_inc(&dev->rx_dropped);
> -               kfree_skb(skb);
> -               return NET_RX_DROP;
> -       }
> +       int ret = ____dev_forward_skb(dev, skb);
>
> -       skb_scrub_packet(skb, true);
> -       skb->priority = 0;
> -       skb->protocol = eth_type_trans(skb, dev);
> -       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
> +       if (likely(!ret)) {
> +               skb->protocol = eth_type_trans(skb, dev);
> +               skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
> +       }
>
> -       return 0;
> +       return ret;
>  }
>  EXPORT_SYMBOL_GPL(__dev_forward_skb);
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 00351cd..b391209 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
>         return dev_forward_skb(dev, skb);
>  }
>
> +static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
> +                                     struct sk_buff *skb)
> +{
> +       int ret = ____dev_forward_skb(dev, skb);
> +
> +       if (likely(!ret)) {
> +               skb->dev = dev;
> +               ret = netif_rx(skb);
> +       }
> +
> +       return ret;
> +}
> +
>  static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
>  {
>         int ret;
> @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
>         return ret;
>  }
>
> +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
> +                                u32 flags)
> +{
> +       /* skb->mac_len is not set on normal egress */
> +       unsigned int mlen = skb->network_header - skb->mac_header;
> +
> +       __skb_pull(skb, mlen);

syzbot found an issue when redirecting an LWT_XMIT program using
BPF_PROG_TEST_RUN.

That path produces packets with skb->data at skb->network_header, as
is_l2 is false in bpf_prog_test_run_skb.

I think the correct fix here is to pull from skb->data up to
skb->network_header, instead of unconditionally from skb->mac_header:

-       unsigned int mlen = skb->network_header - skb->mac_header;
+       unsigned int mlen = skb_network_offset(skb);

-       __skb_pull(skb, mlen);
-
-       /* At ingress, the mac header has already been pulled once.
-        * At egress, skb_pospull_rcsum has to be done in case that
-        * the skb is originated from ingress (i.e. a forwarded skb)
-        * to ensure that rcsum starts at net header.
-        */
-       if (!skb_at_tc_ingress(skb))
-               skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+       if (mlen) {
+               __skb_pull(skb, mlen);
+
+               /* At ingress, the mac header has already been pulled once.
+                * At egress, skb_pospull_rcsum has to be done in case that
+                * the skb is originated from ingress (i.e. a forwarded skb)
+                * to ensure that rcsum starts at net header.
+                */
+               if (!skb_at_tc_ingress(skb))
+                       skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+       }

Comments, concerns?

> +
> +       /* At ingress, the mac header has already been pulled once.
> +        * At egress, skb_pospull_rcsum has to be done in case that
> +        * the skb is originated from ingress (i.e. a forwarded skb)
> +        * to ensure that rcsum starts at net header.
> +        */
> +       if (!skb_at_tc_ingress(skb))
> +               skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
> +       skb_pop_mac_header(skb);
> +       skb_reset_mac_len(skb);
> +       return flags & BPF_F_INGRESS ?
> +              __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
> +}
> +
> +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
> +                                u32 flags)
> +{

This function has since been extended with check

+       /* Verify that a link layer header is carried */
+       if (unlikely(skb->mac_header >= skb->network_header)) {
+               kfree_skb(skb);
+               return -ERANGE;
+       }
+

I haven't checked yet, but I think that this will stillbe incorrect
from LWT_XMIT to a destination that expects skb->data at
skb->mac_header.

> +       bpf_push_mac_rcsum(skb);
> +       return flags & BPF_F_INGRESS ?
> +              __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
> +}