[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251018142124.783206-2-dongml2@chinatelecom.cn>
Date: Sat, 18 Oct 2025 22:21:20 +0800
From: Menglong Dong <menglong8.dong@...il.com>
To: ast@...nel.org,
jolsa@...nel.org
Cc: daniel@...earbox.net,
john.fastabend@...il.com,
andrii@...nel.org,
martin.lau@...ux.dev,
eddyz87@...il.com,
song@...nel.org,
yonghong.song@...ux.dev,
kpsingh@...nel.org,
sdf@...ichev.me,
haoluo@...gle.com,
mattbobrowski@...gle.com,
rostedt@...dmis.org,
mhiramat@...nel.org,
mathieu.desnoyers@...icios.com,
leon.hwang@...ux.dev,
bpf@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Subject: [PATCH RFC bpf-next 1/5] bpf: add tracing session support
The tracing session is something that similar to kprobe session. It allow
to attach a single BPF program to both the entry and the exit of the
target functions.
While a non-zero value is returned by the fentry, the fexit will be
skipped, which is similar to kprobe session.
Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
Co-developed-by: Leon Hwang <leon.hwang@...ux.dev>
Signed-off-by: Leon Hwang <leon.hwang@...ux.dev>
---
arch/arm64/net/bpf_jit_comp.c | 3 +++
arch/loongarch/net/bpf_jit.c | 3 +++
arch/powerpc/net/bpf_jit_comp.c | 3 +++
arch/riscv/net/bpf_jit_comp64.c | 3 +++
arch/s390/net/bpf_jit_comp.c | 3 +++
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/btf.c | 2 ++
kernel/bpf/syscall.c | 2 ++
kernel/bpf/trampoline.c | 5 ++++-
kernel/bpf/verifier.c | 12 +++++++++---
net/bpf/test_run.c | 1 +
net/core/bpf_sk_storage.c | 1 +
tools/include/uapi/linux/bpf.h | 1 +
14 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index ab83089c3d8f..06f4bd6c6755 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2788,6 +2788,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *image, *tmp;
int ret;
+ if (tlinks[BPF_TRAMP_SESSION].nr_links)
+ return -EOPNOTSUPP;
+
/* image doesn't need to be in module memory range, so we can
* use kvmalloc.
*/
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index cbe53d0b7fb0..ad596341658a 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1739,6 +1739,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *image, *tmp;
struct jit_ctx ctx;
+ if (tlinks[BPF_TRAMP_SESSION].nr_links)
+ return -EOPNOTSUPP;
+
size = ro_image_end - ro_image;
image = kvmalloc(size, GFP_KERNEL);
if (!image)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 88ad5ba7b87f..bcc0ce09f6fa 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1017,6 +1017,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
void *rw_image, *tmp;
int ret;
+ if (tlinks[BPF_TRAMP_SESSION].nr_links)
+ return -EOPNOTSUPP;
+
/*
* rw_image doesn't need to be in module memory range, so we can
* use kvmalloc.
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 45cbc7c6fe49..55b0284bf177 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1286,6 +1286,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
struct rv_jit_context ctx;
u32 size = ro_image_end - ro_image;
+ if (tlinks[BPF_TRAMP_SESSION].nr_links)
+ return -EOPNOTSUPP;
+
image = kvmalloc(size, GFP_KERNEL);
if (!image)
return -ENOMEM;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index cf461d76e9da..3f25bf55b150 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2924,6 +2924,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
struct bpf_tramp_jit tjit;
int ret;
+ if (tlinks[BPF_TRAMP_SESSION].nr_links)
+ return -EOPNOTSUPP;
+
/* Compute offsets, check whether the code fits. */
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 86afd9ac6848..aa9f02b56edd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1270,6 +1270,7 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_FENTRY,
BPF_TRAMP_FEXIT,
BPF_TRAMP_MODIFY_RETURN,
+ BPF_TRAMP_SESSION,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6829936d33f5..79ba3023e8be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1133,6 +1133,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_SESSION,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0de8fc8a0e0b..2c1c3e0caff8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6107,6 +6107,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
+ case BPF_TRACE_SESSION:
/* allow u64* as ctx */
if (btf_is_int(t) && t->size == 8)
return 0;
@@ -6704,6 +6705,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
fallthrough;
case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_SESSION:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
* int LSM hooks, they use MODIFY_RETURN trampolines.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2a9456a3e730..15ce86b19ca4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3549,6 +3549,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
case BPF_PROG_TYPE_TRACING:
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->expected_attach_type != BPF_TRACE_SESSION &&
prog->expected_attach_type != BPF_MODIFY_RETURN) {
err = -EINVAL;
goto out_put_prog;
@@ -4322,6 +4323,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_TRACE_RAW_TP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_SESSION:
case BPF_MODIFY_RETURN:
return BPF_PROG_TYPE_TRACING;
case BPF_LSM_MAC:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5949095e51c3..f6d4dea3461e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -111,7 +111,7 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
return (ptype == BPF_PROG_TYPE_TRACING &&
(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_MODIFY_RETURN)) ||
+ eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_SESSION)) ||
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
@@ -418,6 +418,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
+ tlinks[BPF_TRAMP_SESSION].nr_links ||
tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
* should not be set together.
@@ -515,6 +516,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
+ case BPF_TRACE_SESSION:
+ return BPF_TRAMP_SESSION;
case BPF_LSM_MAC:
if (!prog->aux->attach_func_proto->type)
/* The function returns void, we cannot modify its
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c908015b2d34..40e3274e8bc2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17272,6 +17272,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
break;
case BPF_TRACE_RAW_TP:
case BPF_MODIFY_RETURN:
+ case BPF_TRACE_SESSION:
return 0;
case BPF_TRACE_ITER:
break;
@@ -22727,6 +22728,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_ret) {
if (eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_TRACE_SESSION ||
eatype == BPF_MODIFY_RETURN) {
/* Load nr_args from ctx - 8 */
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
@@ -23668,7 +23670,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
prog_extension &&
(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
- tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
+ tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ tgt_prog->expected_attach_type == BPF_TRACE_SESSION)) {
/* Program extensions can extend all program types
* except fentry/fexit. The reason is the following.
* The fentry/fexit programs are used for performance
@@ -23683,7 +23686,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
* beyond reasonable stack size. Hence extending fentry
* is not allowed.
*/
- bpf_log(log, "Cannot extend fentry/fexit\n");
+ bpf_log(log, "Cannot extend fentry/fexit/session\n");
return -EINVAL;
}
} else {
@@ -23767,6 +23770,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_LSM_CGROUP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_SESSION:
if (!btf_type_is_func(t)) {
bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id);
@@ -23933,6 +23937,7 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
+ case BPF_TRACE_SESSION:
return true;
default:
return false;
@@ -24014,9 +24019,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
tgt_info.tgt_name);
return -EINVAL;
} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ prog->expected_attach_type == BPF_TRACE_SESSION ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
btf_id_set_contains(&noreturn_deny, btf_id)) {
- verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n",
+ verbose(env, "Attaching fexit/session/fmod_ret to __noreturn function '%s' is rejected.\n",
tgt_info.tgt_name);
return -EINVAL;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 05e30ff5b6f9..aa2b5b17a7c7 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -696,6 +696,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_SESSION:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d3fbaf89a698..8da8834aa134 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -365,6 +365,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return true;
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_SESSION:
return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage",
strlen("bpf_sk_storage"));
default:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6829936d33f5..79ba3023e8be 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1133,6 +1133,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_SESSION,
__MAX_BPF_ATTACH_TYPE
};
--
2.51.0
Powered by blists - more mailing lists