[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <12764740.O9o76ZdvQC@7950hx>
Date: Sun, 19 Oct 2025 10:03:31 +0800
From: Menglong Dong <menglong.dong@...ux.dev>
To: ast@...nel.org, jolsa@...nel.org, Menglong Dong <menglong8.dong@...il.com>
Cc: daniel@...earbox.net, john.fastabend@...il.com, andrii@...nel.org,
martin.lau@...ux.dev, eddyz87@...il.com, song@...nel.org,
yonghong.song@...ux.dev, kpsingh@...nel.org, sdf@...ichev.me,
haoluo@...gle.com, mattbobrowski@...gle.com, rostedt@...dmis.org,
mhiramat@...nel.org, mathieu.desnoyers@...icios.com, leon.hwang@...ux.dev,
bpf@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Subject:
Re: [PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for
x86_64
On 2025/10/18 22:21, Menglong Dong wrote:
> Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
> invoke_bpf_session_exit is introduced for this purpose.
>
> In invoke_bpf_session_entry(), we will check if the return value of the
> fentry is 0, and clear the corresponding flag if not. And in
> invoke_bpf_session_exit(), we will check if the corresponding flag is
> set. If not set, the fexit will be skipped.
>
> Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
> Co-developed-by: Leon Hwang <leon.hwang@...ux.dev>
> Signed-off-by: Leon Hwang <leon.hwang@...ux.dev>
> ---
> arch/x86/net/bpf_jit_comp.c | 115 +++++++++++++++++++++++++++++++++++-
> 1 file changed, 114 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index d4c93d9e73e4..0586b96ed529 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -3108,6 +3108,97 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
> return 0;
> }
>
> +static int invoke_bpf_session_entry(const struct btf_func_model *m, u8 **pprog,
> + struct bpf_tramp_links *tl, int stack_size,
> + int run_ctx_off, int session_off,
> + void *image, void *rw_image)
> +{
> + u64 session_flags;
> + u8 *prog = *pprog;
> + u8 *jmp_insn;
> + int i;
> +
> + /* clear the session flags:
> + *
> + * xor rax, rax
> + * mov QWORD PTR [rbp - session_off], rax
> + */
> + EMIT3(0x48, 0x31, 0xC0);
> + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -session_off);
> +
> + for (i = 0; i < tl->nr_links; i++) {
> + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
> + image, rw_image))
> + return -EINVAL;
> +
> + /* fentry prog stored return value into [rbp - 8]. Emit:
> + * if (*(u64 *)(rbp - 8) != 0)
> + * *(u64 *)(rbp - session_off) |= (1 << (i + 1));
> + */
> + /* cmp QWORD PTR [rbp - 0x8], 0x0 */
> + EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00);
> + /* emit 2 nops that will be replaced with JE insn */
> + jmp_insn = prog;
> + emit_nops(&prog, 2);
> +
> + session_flags = (1ULL << (i + 1));
> + /* mov rax, $session_flags */
> + emit_mov_imm64(&prog, BPF_REG_0, session_flags >> 32, (u32) session_flags);
> + /* or QWORD PTR [rbp - session_off], rax */
> + EMIT2(0x48, 0x09);
> + emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off);
> +
> + jmp_insn[0] = X86_JE;
> + jmp_insn[1] = prog - jmp_insn - 2;
> + }
> +
> + *pprog = prog;
> + return 0;
> +}
> +
> +static int invoke_bpf_session_exit(const struct btf_func_model *m, u8 **pprog,
> + struct bpf_tramp_links *tl, int stack_size,
> + int run_ctx_off, int session_off,
> + void *image, void *rw_image)
> +{
> + u64 session_flags;
> + u8 *prog = *pprog;
> + u8 *jmp_insn;
> + int i;
> +
> + /* set the bpf_trace_is_exit flag to the session flags */
> + /* mov rax, 1 */
> + emit_mov_imm32(&prog, false, BPF_REG_0, 1);
> + /* or QWORD PTR [rbp - session_off], rax */
> + EMIT2(0x48, 0x09);
> + emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off);
> +
> + for (i = 0; i < tl->nr_links; i++) {
> + /* check if (1 << (i+1)) is set in the session flags, and
> + * skip the execution of the fexit program if it is.
> + */
> + session_flags = 1ULL << (i + 1);
> + /* mov rax, $session_flags */
> + emit_mov_imm64(&prog, BPF_REG_1, session_flags >> 32, (u32) session_flags);
> + /* test QWORD PTR [rbp - session_off], rax */
> + EMIT2(0x48, 0x85);
> + emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_1, -session_off);
> + /* emit 2 nops that will be replaced with JE insn */
> + jmp_insn = prog;
> + emit_nops(&prog, 2);
> +
> + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, false,
> + image, rw_image))
> + return -EINVAL;
> +
> + jmp_insn[0] = X86_JNE;
> + jmp_insn[1] = prog - jmp_insn - 2;
> + }
> +
> + *pprog = prog;
> + return 0;
> +}
> +
> /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
> #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack) \
> __LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> void *func_addr)
> {
> int i, ret, nr_regs = m->nr_args, stack_size = 0;
> - int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> + int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
> + arg_stack_off, rbx_off;
> struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> + struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
> struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> void *orig_call = func_addr;
> @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> *
> * RBP - nregs_off [ regs count ] always
> *
> + * RBP - session_off [ session flags ] tracing session
> + *
> * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
> *
> * RBP - rbx_off [ rbx value ] always
> @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> /* regs count */
> stack_size += 8;
> nregs_off = stack_size;
> + stack_size += 8;
> + session_off = stack_size;
Oops, this break bpf_get_func_ip(), which will get the ip with ctx[-2].
I'll introduce a "bpf_get_func_ip_proto_tracing_session" to fix it.
>
> if (flags & BPF_TRAMP_F_IP_ARG)
> stack_size += 8; /* room for IP address argument */
> @@ -3345,6 +3442,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> return -EINVAL;
> }
>
> + if (session->nr_links) {
> + if (invoke_bpf_session_entry(m, &prog, session, regs_off,
> + run_ctx_off, session_off,
> + image, rw_image))
> + return -EINVAL;
> + }
> +
> if (fmod_ret->nr_links) {
> branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
> GFP_KERNEL);
> @@ -3409,6 +3513,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> }
> }
>
> + if (session->nr_links) {
> + if (invoke_bpf_session_exit(m, &prog, session, regs_off,
> + run_ctx_off, session_off,
> + image, rw_image)) {
> + ret = -EINVAL;
> + goto cleanup;
> + }
> + }
> +
> if (flags & BPF_TRAMP_F_RESTORE_REGS)
> restore_regs(m, &prog, regs_off);
>
>
Powered by blists - more mailing lists