[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzYURbRGL2D-WV=VUs6to=024wO2u=bGtwwxLEKc6pmfhQ@mail.gmail.com>
Date: Sat, 12 Feb 2022 07:59:57 -0800
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Toke Høiland-Jørgensen <toke@...hat.com>
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>,
Kumar Kartikeya Dwivedi <memxor@...il.com>,
Zhiqian Guan <zhguan@...hat.com>,
Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>
Subject: Re: [PATCH bpf-next v2] libbpf: Use dynamically allocated buffer when
receiving netlink messages
On Fri, Feb 11, 2022 at 3:49 PM Toke Høiland-Jørgensen <toke@...hat.com> wrote:
>
> When receiving netlink messages, libbpf was using a statically allocated
> stack buffer of 4k bytes. This happened to work fine on systems with a 4k
> page size, but on systems with larger page sizes it can lead to truncated
> messages. The user-visible impact of this was that libbpf would insist no
> XDP program was attached to some interfaces because that bit of the netlink
> message got chopped off.
>
> Fix this by switching to a dynamically allocated buffer; we borrow the
> approach from iproute2 of using recvmsg() with MSG_PEEK|MSG_TRUNC to get
> the actual size of the pending message before receiving it, adjusting the
> buffer as necessary. While we're at it, also add retries on interrupted
> system calls around the recvmsg() call.
>
> v2:
> - Move peek logic to libbpf_netlink_recv(), don't double free on ENOMEM.
>
> Reported-by: Zhiqian Guan <zhguan@...hat.com>
> Fixes: 8bbb77b7c7a2 ("libbpf: Add various netlink helpers")
> Acked-by: Kumar Kartikeya Dwivedi <memxor@...il.com>
> Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
> ---
Applied to bpf-next. One improvement would be to avoid initial malloc
of 4096, especially if that size is enough for most cases. You could
detect this through iov.iov_base == buf and not free(iov.iov_base) at
the end. Seems reliable and simple enough. I'll leave it up to you to
follow up, if you think it's a good idea.
> tools/lib/bpf/netlink.c | 55 ++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
> index c39c37f99d5c..a598061f6fea 100644
> --- a/tools/lib/bpf/netlink.c
> +++ b/tools/lib/bpf/netlink.c
> @@ -87,29 +87,75 @@ enum {
> NL_DONE,
> };
>
> +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
> +{
> + int len;
> +
> + do {
> + len = recvmsg(sock, mhdr, flags);
> + } while (len < 0 && (errno == EINTR || errno == EAGAIN));
> +
> + if (len < 0)
> + return -errno;
> + return len;
> +}
> +
> +static int alloc_iov(struct iovec *iov, int len)
> +{
> + void *nbuf;
> +
> + nbuf = realloc(iov->iov_base, len);
> + if (!nbuf)
> + return -ENOMEM;
> +
> + iov->iov_base = nbuf;
> + iov->iov_len = len;
> + return 0;
> +}
> +
> static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
> __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
> void *cookie)
> {
> + struct iovec iov = {};
> + struct msghdr mhdr = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> bool multipart = true;
> struct nlmsgerr *err;
> struct nlmsghdr *nh;
> - char buf[4096];
> int len, ret;
>
> + ret = alloc_iov(&iov, 4096);
> + if (ret)
> + goto done;
> +
> while (multipart) {
> start:
> multipart = false;
> - len = recv(sock, buf, sizeof(buf), 0);
> + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
> + if (len < 0) {
> + ret = len;
> + goto done;
> + }
> +
> + if (len > iov.iov_len) {
> + ret = alloc_iov(&iov, len);
> + if (ret)
> + goto done;
> + }
> +
> + len = netlink_recvmsg(sock, &mhdr, 0);
> if (len < 0) {
> - ret = -errno;
> + ret = len;
> goto done;
> }
>
> if (len == 0)
> break;
>
> - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
> + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
> nh = NLMSG_NEXT(nh, len)) {
> if (nh->nlmsg_pid != nl_pid) {
> ret = -LIBBPF_ERRNO__WRNGPID;
> @@ -151,6 +197,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
> }
> ret = 0;
> done:
> + free(iov.iov_base);
> return ret;
> }
>
> --
> 2.35.1
>
Powered by blists - more mailing lists