[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aO4-jAA5RIUY2yxc@krava>
Date: Tue, 14 Oct 2025 14:14:04 +0200
From: Jiri Olsa <olsajiri@...il.com>
To: Tao Chen <chen.dylane@...ux.dev>
Cc: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
namhyung@...nel.org, mark.rutland@....com,
alexander.shishkin@...ux.intel.com, irogers@...gle.com,
adrian.hunter@...el.com, kan.liang@...ux.intel.com, song@...nel.org,
ast@...nel.org, daniel@...earbox.net, andrii@...nel.org,
martin.lau@...ux.dev, eddyz87@...il.com, yonghong.song@...ux.dev,
john.fastabend@...il.com, kpsingh@...nel.org, sdf@...ichev.me,
haoluo@...gle.com, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [RFC PATCH bpf-next v2 2/2] bpf: Pass external callchain entry
to get_perf_callchain
On Tue, Oct 14, 2025 at 06:01:28PM +0800, Tao Chen wrote:
> As Alexei noted, get_perf_callchain() return values may be reused
> if a task is preempted after the BPF program enters migrate disable
> mode. Drawing on the per-cpu design of bpf_perf_callchain_entries,
> stack-allocated memory of bpf_perf_callchain_entry is used here.
>
> Signed-off-by: Tao Chen <chen.dylane@...ux.dev>
> ---
> kernel/bpf/stackmap.c | 19 +++++++++++--------
> 1 file changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 94e46b7f340..acd72c021c0 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -31,6 +31,11 @@ struct bpf_stack_map {
> struct stack_map_bucket *buckets[] __counted_by(n_buckets);
> };
>
> +struct bpf_perf_callchain_entry {
> + u64 nr;
> + u64 ip[PERF_MAX_STACK_DEPTH];
> +};
> +
> static inline bool stack_map_use_build_id(struct bpf_map *map)
> {
> return (map->map_flags & BPF_F_STACK_BUILD_ID);
> @@ -305,6 +310,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
> bool user = flags & BPF_F_USER_STACK;
> struct perf_callchain_entry *trace;
> bool kernel = !user;
> + struct bpf_perf_callchain_entry entry = { 0 };
so IIUC having entries on stack we do not need to do preempt_disable
you had in the previous version, right?
I saw Andrii's justification to have this on the stack, I think it's
fine, but does it have to be initialized? it seems that only used
entries are copied to map
jirka
>
> if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
> @@ -314,12 +320,8 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
> if (max_depth > sysctl_perf_event_max_stack)
> max_depth = sysctl_perf_event_max_stack;
>
> - trace = get_perf_callchain(regs, NULL, kernel, user, max_depth,
> - false, false);
> -
> - if (unlikely(!trace))
> - /* couldn't fetch the stack trace */
> - return -EFAULT;
> + trace = get_perf_callchain(regs, (struct perf_callchain_entry *)&entry,
> + kernel, user, max_depth, false, false);
>
> return __bpf_get_stackid(map, trace, flags);
> }
> @@ -412,6 +414,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
> bool user = flags & BPF_F_USER_STACK;
> struct perf_callchain_entry *trace;
> + struct bpf_perf_callchain_entry entry = { 0 };
> bool kernel = !user;
> int err = -EINVAL;
> u64 *ips;
> @@ -451,8 +454,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> else if (kernel && task)
> trace = get_callchain_entry_for_task(task, max_depth);
> else
> - trace = get_perf_callchain(regs, NULL, kernel, user, max_depth,
> - crosstask, false);
> + trace = get_perf_callchain(regs, (struct perf_callchain_entry *)&entry,
> + kernel, user, max_depth, crosstask, false);
>
> if (unlikely(!trace) || trace->nr < skip) {
> if (may_fault)
> --
> 2.48.1
>
Powered by blists - more mailing lists