lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8389012b-9d19-b7c4-d9a0-ea8554dfa555@iogearbox.net>
Date:   Thu, 10 Jan 2019 23:22:46 +0100
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Willem de Bruijn <willemdebruijn.kernel@...il.com>,
        Martin KaFai Lau <kafai@...com>
Cc:     Network Development <netdev@...r.kernel.org>,
        Alexei Starovoitov <ast@...com>, tgraf@...g.ch
Subject: Re: [PATCH net 1/2] bpf: Fix bpf_redirect to an ipip/ip6tnl dev

On 01/10/2019 09:41 PM, Willem de Bruijn wrote:
> On Wed, Nov 9, 2016 at 6:57 PM Martin KaFai Lau <kafai@...com> wrote:
>>
>> If the bpf program calls bpf_redirect(dev, 0) and dev is
>> an ipip/ip6tnl, it currently includes the mac header.
>> e.g. If dev is ipip, the end result is IP-EthHdr-IP instead
>> of IP-IP.
>>
>> The fix is to pull the mac header.  At ingress, skb_postpull_rcsum()
>> is not needed because the ethhdr should have been pulled once already
>> and then got pushed back just before calling the bpf_prog.
>> At egress, this patch calls skb_postpull_rcsum().
>>
>> If bpf_redirect(dev, BPF_F_INGRESS) is called,
>> it also fails now because it calls dev_forward_skb() which
>> eventually calls eth_type_trans(skb, dev).  The eth_type_trans()
>> will set skb->type = PACKET_OTHERHOST because the mac address
>> does not match the redirecting dev->dev_addr.  The PACKET_OTHERHOST
>> will eventually cause the ip_rcv() errors out.  To fix this,
>> ____dev_forward_skb() is added.
>>
>> Joint work with Daniel Borkmann.
>>
>> Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel")
>> Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels")
>> Acked-by: Daniel Borkmann <daniel@...earbox.net>
>> Acked-by: Alexei Starovoitov <ast@...com>
>> Signed-off-by: Martin KaFai Lau <kafai@...com>
>> ---
>>  include/linux/netdevice.h | 15 +++++++++++
>>  net/core/dev.c            | 17 +++++-------
>>  net/core/filter.c         | 68 +++++++++++++++++++++++++++++++++++++++++------
>>  3 files changed, 81 insertions(+), 19 deletions(-)
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 91ee364..bf04a46 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
>>  bool is_skb_forwardable(const struct net_device *dev,
>>                         const struct sk_buff *skb);
>>
>> +static __always_inline int ____dev_forward_skb(struct net_device *dev,
>> +                                              struct sk_buff *skb)
>> +{
>> +       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
>> +           unlikely(!is_skb_forwardable(dev, skb))) {
>> +               atomic_long_inc(&dev->rx_dropped);
>> +               kfree_skb(skb);
>> +               return NET_RX_DROP;
>> +       }
>> +
>> +       skb_scrub_packet(skb, true);
>> +       skb->priority = 0;
>> +       return 0;
>> +}
>> +
>>  void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
>>
>>  extern int             netdev_budget;
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index eaad4c2..6666b28 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
>>
>>  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
>>  {
>> -       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
>> -           unlikely(!is_skb_forwardable(dev, skb))) {
>> -               atomic_long_inc(&dev->rx_dropped);
>> -               kfree_skb(skb);
>> -               return NET_RX_DROP;
>> -       }
>> +       int ret = ____dev_forward_skb(dev, skb);
>>
>> -       skb_scrub_packet(skb, true);
>> -       skb->priority = 0;
>> -       skb->protocol = eth_type_trans(skb, dev);
>> -       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
>> +       if (likely(!ret)) {
>> +               skb->protocol = eth_type_trans(skb, dev);
>> +               skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
>> +       }
>>
>> -       return 0;
>> +       return ret;
>>  }
>>  EXPORT_SYMBOL_GPL(__dev_forward_skb);
>>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 00351cd..b391209 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
>>         return dev_forward_skb(dev, skb);
>>  }
>>
>> +static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
>> +                                     struct sk_buff *skb)
>> +{
>> +       int ret = ____dev_forward_skb(dev, skb);
>> +
>> +       if (likely(!ret)) {
>> +               skb->dev = dev;
>> +               ret = netif_rx(skb);
>> +       }
>> +
>> +       return ret;
>> +}
>> +
>>  static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
>>  {
>>         int ret;
>> @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
>>         return ret;
>>  }
>>
>> +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
>> +                                u32 flags)
>> +{
>> +       /* skb->mac_len is not set on normal egress */
>> +       unsigned int mlen = skb->network_header - skb->mac_header;
>> +
>> +       __skb_pull(skb, mlen);
> 
> syzbot found an issue when redirecting an LWT_XMIT program using
> BPF_PROG_TEST_RUN.
> 
> That path produces packets with skb->data at skb->network_header, as
> is_l2 is false in bpf_prog_test_run_skb.
> 
> I think the correct fix here is to pull from skb->data up to
> skb->network_header, instead of unconditionally from skb->mac_header:
> 
> -       unsigned int mlen = skb->network_header - skb->mac_header;
> +       unsigned int mlen = skb_network_offset(skb);
> 
> -       __skb_pull(skb, mlen);
> -
> -       /* At ingress, the mac header has already been pulled once.
> -        * At egress, skb_pospull_rcsum has to be done in case that
> -        * the skb is originated from ingress (i.e. a forwarded skb)
> -        * to ensure that rcsum starts at net header.
> -        */
> -       if (!skb_at_tc_ingress(skb))
> -               skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
> +       if (mlen) {
> +               __skb_pull(skb, mlen);
> +
> +               /* At ingress, the mac header has already been pulled once.
> +                * At egress, skb_pospull_rcsum has to be done in case that
> +                * the skb is originated from ingress (i.e. a forwarded skb)
> +                * to ensure that rcsum starts at net header.
> +                */
> +               if (!skb_at_tc_ingress(skb))
> +                       skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
> +       }
> 
> Comments, concerns?

Hmm, thanks for the report! Do you have some more info on the root cause,
are you saying that skb->mac_header is invalid (~0U) and therefore the
subsequent __skb_pull() will set skb->data to garbage? But then also the
skb_postpull_rcsum() from above would be wrong as well, no? Given we have
no actual LWT_* program under BPF kselftest, I'm actually wondering whether
the BPF_PROG_TEST_RUN framework correctly reflects skb setup from lwt with
all its assumptions, but if that is indeed correct and is the only such
case that causes trouble then another option would be to fix up run_lwt_bpf()
to correctly prep the skb before skb_do_redirect() to avoid potential
breakage on others. Not sure if this would do it, so wild shot in the dark:

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 3e85437..a648568 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -63,6 +63,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
                                     lwt->name ? : "<unknown>");
                        ret = BPF_OK;
                } else {
+                       skb_reset_mac_header(skb);
                        ret = skb_do_redirect(skb);
                        if (ret == 0)
                                ret = BPF_REDIRECT;

>> +
>> +       /* At ingress, the mac header has already been pulled once.
>> +        * At egress, skb_pospull_rcsum has to be done in case that
>> +        * the skb is originated from ingress (i.e. a forwarded skb)
>> +        * to ensure that rcsum starts at net header.
>> +        */
>> +       if (!skb_at_tc_ingress(skb))
>> +               skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
>> +       skb_pop_mac_header(skb);
>> +       skb_reset_mac_len(skb);
>> +       return flags & BPF_F_INGRESS ?
>> +              __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
>> +}
>> +
>> +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
>> +                                u32 flags)
>> +{
> 
> This function has since been extended with check
> 
> +       /* Verify that a link layer header is carried */
> +       if (unlikely(skb->mac_header >= skb->network_header)) {
> +               kfree_skb(skb);
> +               return -ERANGE;
> +       }
> +
> 
> I haven't checked yet, but I think that this will stillbe incorrect
> from LWT_XMIT to a destination that expects skb->data at
> skb->mac_header.

Hmm, but this is in __bpf_redirect_common(), the above issue described
is in __bpf_redirect_no_mac(). Meaning, if a LWT_XMIT prog wants to
egress redirect to a device that expects mac header, then I think the
above check is okay for dropping invalid skb, so the prog first needs
to call bpf_skb_change_head() helper to add a mac header instead.

>> +       bpf_push_mac_rcsum(skb);
>> +       return flags & BPF_F_INGRESS ?
>> +              __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
>> +}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ