netdev - Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAFTs51ULXLnGoBhA_JU4L86RpZifykJtsseawHKvcTUYN+Ar0g@mail.gmail.com>
Date:   Thu, 14 Feb 2019 10:42:40 -0800
From:   Peter Oskolkov <posk@...k.io>
To:     David Ahern <dsahern@...il.com>
Cc:     Peter Oskolkov <posk@...gle.com>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org,
        Willem de Bruijn <willemb@...gle.com>
Subject: Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute

On Thu, Feb 14, 2019 at 10:11 AM David Ahern <dsahern@...il.com> wrote:
>
> On 2/13/19 11:09 PM, Peter Oskolkov wrote:
> > On error the skb should be freed. Tested with diff/steps
> > provided by David Ahern.
> >
> > Reported-by: David Ahern <dsahern@...il.com>
> > Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
> > Signed-off-by: Peter Oskolkov <posk@...gle.com>
> > ---
> >  net/core/lwt_bpf.c | 24 ++++++++++++++++--------
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> > index 32251f3fcda0..f3273cbb6b22 100644
> > --- a/net/core/lwt_bpf.c
> > +++ b/net/core/lwt_bpf.c
> > @@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >       struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
> >       int oif = l3mdev ? l3mdev->ifindex : 0;
> >       struct dst_entry *dst = NULL;
> > +     int err = -EAFNOSUPPORT;
> >       struct sock *sk;
> >       struct net *net;
> >       bool ipv4;
> > -     int err;
> >
> >       if (skb->protocol == htons(ETH_P_IP))
> >               ipv4 = true;
> >       else if (skb->protocol == htons(ETH_P_IPV6))
> >               ipv4 = false;
> >       else
> > -             return -EAFNOSUPPORT;
> > +             goto err;
> >
> > +     err = -EINVAL;
> >       sk = sk_to_full_sk(skb->sk);
> >       if (sk) {
> >               if (sk->sk_bound_dev_if)
> > @@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >
> >               rt = ip_route_output_key(net, &fl4);
> >               if (IS_ERR(rt))
> > -                     return -EINVAL;
> > +                     goto err;
> >               dst = &rt->dst;
> >       } else {
> >               struct ipv6hdr *iph6 = ipv6_hdr(skb);
> > @@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >               fl6.saddr = iph6->saddr;
> >
> >               err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> > -             if (err || IS_ERR(dst))
> > -                     return -EINVAL;
> > +             if (err || IS_ERR(dst)) {
> > +                     err = -EINVAL;
> > +                     goto err;
> > +             }
> >       }
> >       if (unlikely(dst->error)) {
> >               dst_release(dst);
> > -             return -EINVAL;
> > +             err = -EINVAL;
> > +             goto err;
> >       }
> >
> >       /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
>
> EINVAL is a confusing return code; it is not an EINVAL problem, it is a
> routing problem:

Thanks, David! Sent a v2 of the patch.

>
> ...
> starting egress IPv4 encap test
> ping: sendmsg: Invalid argument
> FAIL: test_ping: 1
>
>
> Versus returning the error from the lookup:
> ...
> starting egress IPv4 encap test
> ping: sendmsg: No route to host
> FAIL: test_ping: 1
>
>
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index f3273cbb6b22..a1901ba319fc 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -191,7 +191,6 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>         else
>                 goto err;
>
> -       err = -EINVAL;
>         sk = sk_to_full_sk(skb->sk);
>         if (sk) {
>                 if (sk->sk_bound_dev_if)
> @@ -216,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>                 fl4.saddr = iph->saddr;
>
>                 rt = ip_route_output_key(net, &fl4);
> -               if (IS_ERR(rt))
> +               if (IS_ERR(rt)) {
> +                       err = PTR_ERR(rt);
>                         goto err;
> +               }
>                 dst = &rt->dst;
>         } else {
>                 struct ipv6hdr *iph6 = ipv6_hdr(skb);
> @@ -232,14 +233,12 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>                 fl6.saddr = iph6->saddr;
>
>                 err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> -               if (err || IS_ERR(dst)) {
> -                       err = -EINVAL;
> +               if (err || IS_ERR(dst))
>                         goto err;
> -               }
>         }
>         if (unlikely(dst->error)) {
>                 dst_release(dst);
> -               err = -EINVAL;
> +               err = dst->error;
>                 goto err;
>         }
>
>
>
>
> > @@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >        */
> >       err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> >       if (unlikely(err))
> > -             return err;
> > +             goto err;
> >
> >       skb_dst_drop(skb);
> >       skb_dst_set(skb, dst);
> >
> >       err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
> >       if (unlikely(err))
> > -             return err;
> > +             goto err;
> >
> >       /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
> >       return LWTUNNEL_XMIT_DONE;
> > +
> > +err:
> > +     kfree_skb(skb);
> > +     return err;
> >  }
> >
> >  static int bpf_xmit(struct sk_buff *skb)
> >
>
> I figured it was a leaked skb.
>
> Also, the test script needs to be updated as well with the negative
> tests -- ie., toggle the route from a dev/gateway to a reject
> (e.g.,unreachable) and back.
>
> Also, don't exit on the first failure - run all of them.

I'll refactor the test as you suggest here
when I add VRF and GRO tests in a couple of weeks, if this is OK.

>
> Having the result line up is more user friendly. e.g.,
>
> # ./fib_tests.sh
>
> Single path route test
>     Start point
>     TEST: IPv4 fibmatch                                     [ OK ]
>     TEST: IPv6 fibmatch                                     [ OK ]
>     Nexthop device deleted
>     TEST: IPv4 fibmatch - no route                          [ OK ]
>     TEST: IPv6 fibmatch - no route                          [ OK ]
> ...