lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAADnVQK4O-igzuSvfgjG1ZqdUBXrjNL=4tJZuS1uy36GCD2mVg@mail.gmail.com>
Date: Fri, 9 Jan 2026 08:24:41 -0800
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: Leon Hwang <leon.hwang@...ux.dev>
Cc: bpf <bpf@...r.kernel.org>, Alexei Starovoitov <ast@...nel.org>, 
	Daniel Borkmann <daniel@...earbox.net>, Andrii Nakryiko <andrii@...nel.org>, 
	Martin KaFai Lau <martin.lau@...ux.dev>, Eduard Zingerman <eddyz87@...il.com>, Song Liu <song@...nel.org>, 
	Yonghong Song <yonghong.song@...ux.dev>, John Fastabend <john.fastabend@...il.com>, 
	KP Singh <kpsingh@...nel.org>, Stanislav Fomichev <sdf@...ichev.me>, Hao Luo <haoluo@...gle.com>, 
	Jiri Olsa <jolsa@...nel.org>, "David S . Miller" <davem@...emloft.net>, David Ahern <dsahern@...nel.org>, 
	Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>, 
	Dave Hansen <dave.hansen@...ux.intel.com>, X86 ML <x86@...nel.org>, 
	"H . Peter Anvin" <hpa@...or.com>, Matt Bobrowski <mattbobrowski@...gle.com>, 
	Steven Rostedt <rostedt@...dmis.org>, Masami Hiramatsu <mhiramat@...nel.org>, 
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>, Shuah Khan <shuah@...nel.org>, 
	Network Development <netdev@...r.kernel.org>, LKML <linux-kernel@...r.kernel.org>, 
	linux-trace-kernel <linux-trace-kernel@...r.kernel.org>, 
	"open list:KERNEL SELFTEST FRAMEWORK" <linux-kselftest@...r.kernel.org>, kernel-patches-bot@...com
Subject: Re: [PATCH bpf-next 1/3] bpf, x64: Call perf_snapshot_branch_stack in trampoline

On Fri, Jan 9, 2026 at 7:37 AM Leon Hwang <leon.hwang@...ux.dev> wrote:
>
> When the PMU LBR is running in branch-sensitive mode,
> 'perf_snapshot_branch_stack()' may capture branch entries from the
> trampoline entry up to the call site inside a BPF program. These branch
> entries are not useful for analyzing the control flow of the tracee.
>
> To eliminate such noise for tracing programs, the branch snapshot should
> be taken as early as possible:
>
> * Call 'perf_snapshot_branch_stack()' at the very beginning of the
>   trampoline for fentry programs.
> * Call 'perf_snapshot_branch_stack()' immediately after invoking the
>   tracee for fexit programs.
>
> With this change, LBR snapshots remain meaningful even when multiple BPF
> programs execute before the one requesting LBR data.
>
> In addition, more relevant branch entries can be captured on AMD CPUs,
> which provide a 16-entry-deep LBR stack.
>
> Signed-off-by: Leon Hwang <leon.hwang@...ux.dev>
> ---
>  arch/x86/net/bpf_jit_comp.c | 66 +++++++++++++++++++++++++++++++++++++
>  include/linux/bpf.h         | 16 ++++++++-
>  2 files changed, 81 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index e3b1c4b1d550..a71a6c675392 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -12,6 +12,7 @@
>  #include <linux/bpf.h>
>  #include <linux/memory.h>
>  #include <linux/sort.h>
> +#include <linux/perf_event.h>
>  #include <asm/extable.h>
>  #include <asm/ftrace.h>
>  #include <asm/set_memory.h>
> @@ -19,6 +20,7 @@
>  #include <asm/text-patching.h>
>  #include <asm/unwind.h>
>  #include <asm/cfi.h>
> +#include "../events/perf_event.h"
>
>  static bool all_callee_regs_used[4] = {true, true, true, true};
>
> @@ -3137,6 +3139,54 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
>         return 0;
>  }
>
> +DEFINE_PER_CPU(struct bpf_tramp_branch_entries, bpf_branch_snapshot);
> +
> +static int invoke_branch_snapshot(u8 **pprog, void *image, void *rw_image)
> +{
> +       struct bpf_tramp_branch_entries __percpu *pptr = &bpf_branch_snapshot;
> +       u8 *prog = *pprog;
> +
> +       /*
> +        * Emit:
> +        *
> +        * struct bpf_tramp_branch_entries *br = this_cpu_ptr(&bpf_branch_snapshot);
> +        * br->cnt = static_call(perf_snapshot_branch_stack)(br->entries, x86_pmu.lbr_nr);
> +        */
> +
> +       /* mov rbx, &bpf_branch_snapshot */
> +       emit_mov_imm64(&prog, BPF_REG_6, (long) pptr >> 32, (u32)(long) pptr);
> +#ifdef CONFIG_SMP
> +       /* add rbx, gs:[<off>] */
> +       EMIT2(0x65, 0x48);
> +       EMIT3(0x03, 0x1C, 0x25);
> +       EMIT((u32)(unsigned long)&this_cpu_off, 4);
> +#endif
> +       /* mov esi, x86_pmu.lbr_nr */
> +       EMIT1_off32(0xBE, x86_pmu.lbr_nr);
> +       /* lea rdi, [rbx + offsetof(struct bpf_tramp_branch_entries, entries)] */
> +       EMIT4(0x48, 0x8D, 0x7B, offsetof(struct bpf_tramp_branch_entries, entries));
> +       /* call static_call_query(perf_snapshot_branch_stack) */
> +       if (emit_rsb_call(&prog, static_call_query(perf_snapshot_branch_stack),
> +                         image + (prog - (u8 *)rw_image)))
> +               return -EINVAL;
> +       /* mov dword ptr [rbx], eax */
> +       EMIT2(0x89, 0x03);
> +
> +       *pprog = prog;
> +       return 0;
> +}
> +
> +static bool bpf_prog_copy_branch_snapshot(struct bpf_tramp_links *tl)
> +{
> +       bool copy = false;
> +       int i;
> +
> +       for (i = 0; i < tl->nr_links; i++)
> +               copy = copy || tl->links[i]->link.prog->copy_branch_snapshot;
> +
> +       return copy;
> +}
> +
>  /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
>  #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)    \
>         __LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> @@ -3366,6 +3416,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>
>         save_args(m, &prog, regs_off, false, flags);
>
> +       if (bpf_prog_copy_branch_snapshot(fentry)) {
> +               /* Get branch snapshot asap. */
> +               if (invoke_branch_snapshot(&prog, image, rw_image)) {
> +                       ret = -EINVAL;
> +                       goto cleanup;
> +               }
> +       }

Andrii already tried to do it.
I hated it back then and still hate the idea.
We're not going to add custom logic for one specific use case
no matter how appealing it sounds to save very limited LBR entries.
The HW will get better, but we will be stuck with this optimization forever.

pw-bot: cr

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ