netdev - Re: [PATCH bpf-next v2] libbpf: Use dynamically allocated buffer when receiving netlink messages

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <CAEf4BzYURbRGL2D-WV=VUs6to=024wO2u=bGtwwxLEKc6pmfhQ@mail.gmail.com>
Date:   Sat, 12 Feb 2022 07:59:57 -0800
From:   Andrii Nakryiko <andrii.nakryiko@...il.com>
To:     Toke Høiland-Jørgensen <toke@...hat.com>
Cc:     Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>,
        Martin KaFai Lau <kafai@...com>,
        Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
        John Fastabend <john.fastabend@...il.com>,
        KP Singh <kpsingh@...nel.org>,
        Kumar Kartikeya Dwivedi <memxor@...il.com>,
        Zhiqian Guan <zhguan@...hat.com>,
        Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>
Subject: Re: [PATCH bpf-next v2] libbpf: Use dynamically allocated buffer when
 receiving netlink messages

On Fri, Feb 11, 2022 at 3:49 PM Toke Høiland-Jørgensen <toke@...hat.com> wrote:
>
> When receiving netlink messages, libbpf was using a statically allocated
> stack buffer of 4k bytes. This happened to work fine on systems with a 4k
> page size, but on systems with larger page sizes it can lead to truncated
> messages. The user-visible impact of this was that libbpf would insist no
> XDP program was attached to some interfaces because that bit of the netlink
> message got chopped off.
>
> Fix this by switching to a dynamically allocated buffer; we borrow the
> approach from iproute2 of using recvmsg() with MSG_PEEK|MSG_TRUNC to get
> the actual size of the pending message before receiving it, adjusting the
> buffer as necessary. While we're at it, also add retries on interrupted
> system calls around the recvmsg() call.
>
> v2:
>   - Move peek logic to libbpf_netlink_recv(), don't double free on ENOMEM.
>
> Reported-by: Zhiqian Guan <zhguan@...hat.com>
> Fixes: 8bbb77b7c7a2 ("libbpf: Add various netlink helpers")
> Acked-by: Kumar Kartikeya Dwivedi <memxor@...il.com>
> Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
> ---

Applied to bpf-next. One improvement would be to avoid initial malloc
of 4096, especially if that size is enough for most cases. You could
detect this through iov.iov_base == buf and not free(iov.iov_base) at
the end. Seems reliable and simple enough. I'll leave it up to you to
follow up, if you think it's a good idea.

>  tools/lib/bpf/netlink.c | 55 ++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
> index c39c37f99d5c..a598061f6fea 100644
> --- a/tools/lib/bpf/netlink.c
> +++ b/tools/lib/bpf/netlink.c
> @@ -87,29 +87,75 @@ enum {
>         NL_DONE,
>  };
>
> +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
> +{
> +       int len;
> +
> +       do {
> +               len = recvmsg(sock, mhdr, flags);
> +       } while (len < 0 && (errno == EINTR || errno == EAGAIN));
> +
> +       if (len < 0)
> +               return -errno;
> +       return len;
> +}
> +
> +static int alloc_iov(struct iovec *iov, int len)
> +{
> +       void *nbuf;
> +
> +       nbuf = realloc(iov->iov_base, len);
> +       if (!nbuf)
> +               return -ENOMEM;
> +
> +       iov->iov_base = nbuf;
> +       iov->iov_len = len;
> +       return 0;
> +}
> +
>  static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
>                                __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
>                                void *cookie)
>  {
> +       struct iovec iov = {};
> +       struct msghdr mhdr = {
> +               .msg_iov = &iov,
> +               .msg_iovlen = 1,
> +       };
>         bool multipart = true;
>         struct nlmsgerr *err;
>         struct nlmsghdr *nh;
> -       char buf[4096];
>         int len, ret;
>
> +       ret = alloc_iov(&iov, 4096);
> +       if (ret)
> +               goto done;
> +
>         while (multipart) {
>  start:
>                 multipart = false;
> -               len = recv(sock, buf, sizeof(buf), 0);
> +               len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
> +               if (len < 0) {
> +                       ret = len;
> +                       goto done;
> +               }
> +
> +               if (len > iov.iov_len) {
> +                       ret = alloc_iov(&iov, len);
> +                       if (ret)
> +                               goto done;
> +               }
> +
> +               len = netlink_recvmsg(sock, &mhdr, 0);
>                 if (len < 0) {
> -                       ret = -errno;
> +                       ret = len;
>                         goto done;
>                 }
>
>                 if (len == 0)
>                         break;
>
> -               for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
> +               for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
>                      nh = NLMSG_NEXT(nh, len)) {
>                         if (nh->nlmsg_pid != nl_pid) {
>                                 ret = -LIBBPF_ERRNO__WRNGPID;
> @@ -151,6 +197,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
>         }
>         ret = 0;
>  done:
> +       free(iov.iov_base);
>         return ret;
>  }
>
> --
> 2.35.1
>