[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzZTYVq=oct0rUg5Y+wOc07k2Pcz-66M-NtjRFJTez0AvA@mail.gmail.com>
Date: Wed, 15 Jul 2020 22:55:16 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Song Liu <songliubraving@...com>
Cc: open list <linux-kernel@...r.kernel.org>,
bpf <bpf@...r.kernel.org>, Networking <netdev@...r.kernel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Kernel Team <kernel-team@...com>,
john fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...omium.org>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Peter Ziljstra <peterz@...radead.org>
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: separate bpf_get_[stack|stackid] for
perf events BPF
On Tue, Jul 14, 2020 at 11:08 PM Song Liu <songliubraving@...com> wrote:
>
> Calling get_perf_callchain() on perf_events from PEBS entries may cause
> unwinder errors. To fix this issue, the callchain is fetched early. Such
> perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY.
>
> Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may
> also cause unwinder errors. To fix this, add separate version of these
> two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in
> bpf_perf_event_data_kern->data->callchain.
>
> Signed-off-by: Song Liu <songliubraving@...com>
> ---
> include/linux/bpf.h | 2 +
> kernel/bpf/stackmap.c | 204 +++++++++++++++++++++++++++++++++++----
> kernel/trace/bpf_trace.c | 4 +-
> 3 files changed, 190 insertions(+), 20 deletions(-)
>
Glad this approach worked out! Few minor bugs below, though.
[...]
> + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
> + return -EINVAL;
> +
> + user = flags & BPF_F_USER_STACK;
> + kernel = !user;
> +
> + has_kernel = !event->attr.exclude_callchain_kernel;
> + has_user = !event->attr.exclude_callchain_user;
> +
> + if ((kernel && !has_kernel) || (user && !has_user))
> + return -EINVAL;
> +
> + trace = ctx->data->callchain;
> + if (!trace || (!has_kernel && !has_user))
(!has_kernel && !has_user) can never happen, it's checked by if above
(one of kernel or user is always true => one of has_user or has_kernel
is always true).
> + return -EFAULT;
> +
> + if (has_kernel && has_user) {
> + __u64 nr_kernel = count_kernel_ip(trace);
> + int ret;
> +
> + if (kernel) {
> + __u64 nr = trace->nr;
> +
> + trace->nr = nr_kernel;
> + ret = __bpf_get_stackid(map, trace, flags);
> +
> + /* restore nr */
> + trace->nr = nr;
> + } else { /* user */
> + u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +
> + skip += nr_kernel;
> + if (skip > ~BPF_F_SKIP_FIELD_MASK)
something fishy here: ~BPF_F_SKIP_FIELD_MASK is a really big number,
were you going to check that skip is not bigger than 255 (i.e., fits
within BPF_F_SKIP_FIELD_MASK)?
> + return -EFAULT;
> +
> + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
> + (skip & BPF_F_SKIP_FIELD_MASK);
> + ret = __bpf_get_stackid(map, trace, flags);
> + }
> + return ret;
> + }
> + return __bpf_get_stackid(map, trace, flags);
> +}
> +
[...]
> +
> + has_kernel = !event->attr.exclude_callchain_kernel;
> + has_user = !event->attr.exclude_callchain_user;
> +
> + if ((kernel && !has_kernel) || (user && !has_user))
> + goto clear;
> +
> + err = -EFAULT;
> + trace = ctx->data->callchain;
> + if (!trace || (!has_kernel && !has_user))
> + goto clear;
same as above for bpf_get_stackid, probably can be simplified
> +
> + if (has_kernel && has_user) {
> + __u64 nr_kernel = count_kernel_ip(trace);
> + int ret;
> +
> + if (kernel) {
> + __u64 nr = trace->nr;
> +
> + trace->nr = nr_kernel;
> + ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
> + size, flags);
> +
> + /* restore nr */
> + trace->nr = nr;
> + } else { /* user */
> + u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +
> + skip += nr_kernel;
> + if (skip > ~BPF_F_SKIP_FIELD_MASK)
> + goto clear;
> +
and here
> + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) |
> + (skip & BPF_F_SKIP_FIELD_MASK);
actually if you check that skip <= BPF_F_SKIP_FIELD_MASK, you don't
need to mask it here, just `| skip`
> + ret = __bpf_get_stack(ctx->regs, NULL, trace, buf,
> + size, flags);
> + }
> + return ret;
> + }
> + return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags);
> +clear:
> + memset(buf, 0, size);
> + return err;
> +
> +}
> +
[...]
Powered by blists - more mailing lists