[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250528034712.138701-18-dongml2@chinatelecom.cn>
Date: Wed, 28 May 2025 11:47:04 +0800
From: Menglong Dong <menglong8.dong@...il.com>
To: alexei.starovoitov@...il.com,
rostedt@...dmis.org,
jolsa@...nel.org
Cc: bpf@...r.kernel.org,
Menglong Dong <dongml2@...natelecom.cn>,
linux-kernel@...r.kernel.org
Subject: [PATCH bpf-next 17/25] bpf: make trampoline compatible with global trampoline
For now, the bpf global trampoline can't work together with trampoline.
For example, we will fail on attaching the FENTRY_MULTI to the functions
that FENTRY exists, and FENTRY will also fail if FENTRY_MULTI exists.
We make the global trampoline work together with trampoline in this
commit.
It is not easy. The most difficult part is synchronization between
bpf_gtrampoline_link_prog and bpf_trampoline_link_prog, and we use a
rw_semaphore here, which is quite ugly. We hold the write lock in
bpf_gtrampoline_link_prog and read lock in bpf_trampoline_link_prog.
We introduce the function bpf_gtrampoline_link_tramp() to make
bpf_gtramp_link fit bpf_trampoline, which will be called in
bpf_gtrampoline_link_prog(). If the bpf_trampoline of the function exist
in the kfunc_md or we find it with bpf_trampoline_lookup_exist(), it means
that we need do the fitting. The fitting is simple, we create a
bpf_shim_tramp_link for our prog and link it to the bpf_trampoline with
__bpf_trampoline_link_prog().
It's a little complex for the bpf_trampoline_link_prog() case. We create
bpf_shim_tramp_link for all the bpf progs in kfunc_md and add it to the
bpf_trampoline before we call __bpf_trampoline_link_prog() in
bpf_gtrampoline_replace(). And we will fallback in
bpf_gtrampoline_replace_finish() if error is returned by
__bpf_trampoline_link_prog().
In __bpf_gtrampoline_unlink_prog(), we will call bpf_gtrampoline_remove()
to release the bpf_shim_tramp_link, and the bpf prog will be unlinked if
it is ever linked successfully in bpf_link_free().
Another solution is to fit into the existing trampoline. For example, we
can add the bpf prog to the kfunc_md if tracing_multi bpf prog is attached
on the target function when we attach a tracing bpf prog. And we can also
update the tracing_multi prog to the trampoline if tracing prog exists
on the target function. I think this will make the compatibility much
easier.
The code in this part is very ugly and messy, and I think it will be a
liberation to split it out to another series :/
Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
---
include/linux/bpf.h | 6 +
include/linux/kfunc_md.h | 2 +
kernel/bpf/syscall.c | 2 +-
kernel/bpf/trampoline.c | 291 +++++++++++++++++++++++++++++++++++++--
kernel/trace/kfunc_md.c | 9 +-
5 files changed, 293 insertions(+), 17 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7191ad25d519..0f4605be87fc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1173,6 +1173,11 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_INDIRECT BIT(8)
+/* Indicate that bpf global trampoline is also used on this function and
+ * the trampoline is replacing it.
+ */
+#define BPF_TRAMP_F_REPLACE BIT(9)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
@@ -2554,6 +2559,7 @@ void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
+void bpf_link_free(struct bpf_link *link);
void bpf_token_inc(struct bpf_token *token);
void bpf_token_put(struct bpf_token *token);
diff --git a/include/linux/kfunc_md.h b/include/linux/kfunc_md.h
index f1b1012eeab2..956e16f96d82 100644
--- a/include/linux/kfunc_md.h
+++ b/include/linux/kfunc_md.h
@@ -29,6 +29,8 @@ struct kfunc_md {
#endif
unsigned long func;
struct kfunc_md_tramp_prog *bpf_progs[BPF_TRAMP_MAX];
+ /* fallback case, there is already a trampoline on this function */
+ struct bpf_trampoline *tramp;
#ifdef CONFIG_FUNCTION_METADATA
/* the array is used for the fast mode */
struct kfunc_md_array *array;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0cd989381128..c1c92c2b2cfc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3184,7 +3184,7 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
}
/* bpf_link_free is guaranteed to be called from process context */
-static void bpf_link_free(struct bpf_link *link)
+void bpf_link_free(struct bpf_link *link)
{
const struct bpf_link_ops *ops = link->ops;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index b92d1d4f1033..81b62aae9faf 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -14,6 +14,7 @@
#include <linux/bpf_lsm.h>
#include <linux/delay.h>
#include <linux/kfunc_md.h>
+#include <linux/execmem.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -142,20 +143,44 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
PAGE_SIZE, true, ksym->name);
}
-static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+static struct bpf_trampoline *__bpf_trampoline_lookup_exist(u64 key)
{
struct bpf_trampoline *tr;
struct hlist_head *head;
- int i;
- mutex_lock(&trampoline_mutex);
head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
hlist_for_each_entry(tr, head, hlist) {
if (tr->key == key) {
refcount_inc(&tr->refcnt);
- goto out;
+ return tr;
}
}
+
+ return NULL;
+}
+
+static struct bpf_trampoline *bpf_trampoline_lookup_exist(u64 key)
+{
+ struct bpf_trampoline *tr;
+
+ mutex_lock(&trampoline_mutex);
+ tr = __bpf_trampoline_lookup_exist(key);
+ mutex_unlock(&trampoline_mutex);
+
+ return tr;
+}
+
+static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ struct bpf_trampoline *tr;
+ struct hlist_head *head;
+ int i;
+
+ mutex_lock(&trampoline_mutex);
+ tr = __bpf_trampoline_lookup_exist(key);
+ if (tr)
+ goto out;
+
tr = kzalloc(sizeof(*tr), GFP_KERNEL);
if (!tr)
goto out;
@@ -172,6 +197,7 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
tr->key = key;
INIT_HLIST_NODE(&tr->hlist);
+ head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
hlist_add_head(&tr->hlist, head);
refcount_set(&tr->refcnt, 1);
mutex_init(&tr->mutex);
@@ -228,7 +254,11 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
if (tr->func.ftrace_managed) {
ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
- ret = register_ftrace_direct(tr->fops, (long)new_addr);
+ if (tr->flags & BPF_TRAMP_F_REPLACE)
+ ret = replace_ftrace_direct(tr->fops, global_tr.fops,
+ (long)new_addr);
+ else
+ ret = register_ftrace_direct(tr->fops, (long)new_addr);
} else {
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
}
@@ -236,6 +266,17 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
+static int
+bpf_trampoline_get_count(const struct bpf_trampoline *tr)
+{
+ int count = 0;
+
+ for (int kind = 0; kind < BPF_TRAMP_MAX; kind++)
+ count += tr->progs_cnt[kind];
+
+ return count;
+}
+
static struct bpf_tramp_links *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
@@ -608,15 +649,173 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
return err;
}
+static int bpf_gtrampoline_get_link(struct bpf_trampoline *tr, struct bpf_prog *prog,
+ u64 cookie, int kind,
+ struct bpf_shim_tramp_link **link)
+{
+ struct bpf_shim_tramp_link *__link;
+
+ __link = kzalloc(sizeof(*__link), GFP_KERNEL);
+ if (!__link)
+ return -ENOMEM;
+
+ __link->link.cookie = cookie;
+
+ bpf_link_init(&__link->link.link, BPF_LINK_TYPE_UNSPEC,
+ &bpf_shim_tramp_link_lops, prog);
+
+ /* the bpf_shim_tramp_link will hold a reference on the prog and tr */
+ refcount_inc(&tr->refcnt);
+ bpf_prog_inc(prog);
+ *link = __link;
+
+ return 0;
+}
+
+static struct bpf_tramp_link *
+bpf_gtrampoline_find_link(struct bpf_trampoline *tr, struct bpf_prog *prog)
+{
+ struct bpf_tramp_link *link;
+
+ for (int kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+ hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
+ if (link->link.prog == prog)
+ return link;
+ }
+ }
+
+ return NULL;
+}
+
+static int bpf_gtrampoline_remove(struct bpf_trampoline *tr, struct bpf_prog *prog,
+ bool remove_list)
+{
+ struct bpf_shim_tramp_link *slink;
+ int kind;
+
+ slink = (struct bpf_shim_tramp_link *)bpf_gtrampoline_find_link(tr, prog);
+ if (WARN_ON_ONCE(!slink))
+ return -EINVAL;
+
+ if (!slink->trampoline && remove_list) {
+ kind = bpf_attach_type_to_tramp(prog);
+ hlist_del_init(&slink->link.tramp_hlist);
+ tr->progs_cnt[kind]--;
+ }
+ bpf_link_free(&slink->link.link);
+
+ return 0;
+}
+
+static int bpf_gtrampoline_replace(struct bpf_trampoline *tr)
+{
+ struct kfunc_md_tramp_prog *progs;
+ struct bpf_shim_tramp_link *link;
+ struct kfunc_md *md;
+ int err = 0, count;
+
+ kfunc_md_lock();
+ md = kfunc_md_get((unsigned long)tr->func.addr);
+ if (!md || md->tramp) {
+ kfunc_md_put_entry(md);
+ kfunc_md_unlock();
+ return 0;
+ }
+ kfunc_md_unlock();
+
+ rcu_read_lock();
+ md = kfunc_md_get_noref((unsigned long)tr->func.addr);
+ if (!md || md->tramp)
+ goto on_fail;
+
+ count = bpf_trampoline_get_count(tr);
+ /* we are attaching a new link, so +1 here */
+ count += md->bpf_prog_cnt + 1;
+ if (count > BPF_MAX_TRAMP_LINKS) {
+ err = -E2BIG;
+ goto on_fail;
+ }
+
+ for (int kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+ progs = md->bpf_progs[kind];
+ while (progs) {
+ err = bpf_gtrampoline_get_link(tr, progs->prog, progs->cookie,
+ kind, &link);
+ if (err)
+ goto on_fail;
+
+ hlist_add_head(&link->link.tramp_hlist, &tr->progs_hlist[kind]);
+ tr->progs_cnt[kind]++;
+ progs = progs->next;
+ link->trampoline = tr;
+ }
+ }
+
+ tr->flags |= BPF_TRAMP_F_REPLACE;
+ rcu_read_unlock();
+
+ return 0;
+
+on_fail:
+ kfunc_md_put_entry(md);
+ rcu_read_unlock();
+
+ return err;
+}
+
+static void bpf_gtrampoline_replace_finish(struct bpf_trampoline *tr, int err)
+{
+ struct kfunc_md_tramp_prog *progs;
+ struct kfunc_md *md;
+
+ if (!(tr->flags & BPF_TRAMP_F_REPLACE))
+ return;
+
+ kfunc_md_lock();
+ md = kfunc_md_get_noref((unsigned long)tr->func.addr);
+ /* this shouldn't happen, as the md->tramp can only be set with
+ * global_tr_lock.
+ */
+ if (WARN_ON_ONCE(!md || md->tramp))
+ return;
+
+ if (err) {
+ for (int kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+ progs = md->bpf_progs[kind];
+ while (progs) {
+ /* the progs is already added to trampoline
+ * and we need clean it on this case.
+ */
+ bpf_gtrampoline_remove(tr, progs->prog, true);
+ progs = progs->next;
+ }
+ }
+ } else {
+ md->tramp = tr;
+ }
+
+ kfunc_md_put_entry(md);
+ kfunc_md_unlock();
+}
+
int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
int err;
- mutex_lock(&tr->mutex);
- err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ down_read(&global_tr_lock);
+
+ err = bpf_gtrampoline_replace(tr);
+ if (!err) {
+ mutex_lock(&tr->mutex);
+ err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
+ mutex_unlock(&tr->mutex);
+ }
+
+ bpf_gtrampoline_replace_finish(tr, err);
+ up_read(&global_tr_lock);
+
return err;
}
@@ -745,7 +944,7 @@ int bpf_gtrampoline_unlink_prog(struct bpf_gtramp_link *link)
kfunc_md_lock();
for (int i = 0; i < link->entry_cnt; i++) {
md = kfunc_md_get_noref((long)link->entries[i].addr);
- if (WARN_ON_ONCE(!md))
+ if (WARN_ON_ONCE(!md) || md->tramp)
continue;
md->flags |= KFUNC_MD_FL_BPF_REMOVING;
@@ -761,13 +960,65 @@ int bpf_gtrampoline_unlink_prog(struct bpf_gtramp_link *link)
return err;
}
+static int bpf_gtrampoline_link_tramp(struct bpf_gtramp_link_entry *entry,
+ struct bpf_prog *prog)
+{
+ struct bpf_trampoline *tr, *new_tr = NULL;
+ struct bpf_shim_tramp_link *slink = NULL;
+ struct kfunc_md *md;
+ int err, kind;
+ u64 key;
+
+ kfunc_md_lock();
+ md = kfunc_md_get_noref((long)entry->addr);
+ kind = bpf_attach_type_to_tramp(prog);
+ if (!md->tramp) {
+ key = bpf_trampoline_compute_key(NULL, entry->attach_btf,
+ entry->btf_id);
+ new_tr = bpf_trampoline_lookup_exist(key);
+ md->tramp = new_tr;
+ }
+
+ /* check if we need to be replaced by trampoline */
+ tr = md->tramp;
+ kfunc_md_unlock();
+ if (!tr)
+ return 0;
+
+ mutex_lock(&tr->mutex);
+ err = bpf_gtrampoline_get_link(tr, prog, entry->cookie, kind, &slink);
+ if (err)
+ goto err_out;
+
+ err = __bpf_trampoline_link_prog(&slink->link, tr, NULL);
+ if (err)
+ goto err_out;
+ mutex_unlock(&tr->mutex);
+
+ bpf_trampoline_put(new_tr);
+ /* this can only be set on the link success */
+ slink->trampoline = tr;
+ tr->flags |= BPF_TRAMP_F_REPLACE;
+
+ return 0;
+err_out:
+ mutex_unlock(&tr->mutex);
+
+ bpf_trampoline_put(new_tr);
+ if (slink) {
+ bpf_trampoline_put(tr);
+ bpf_link_free(&slink->link.link);
+ }
+ return err;
+}
+
int bpf_gtrampoline_link_prog(struct bpf_gtramp_link *link)
{
struct bpf_gtramp_link_entry *entry;
enum bpf_tramp_prog_type kind;
struct bpf_prog *prog;
struct kfunc_md *md;
- bool update = false;
+ bool update = false, linked;
int err = 0, i;
prog = link->link.prog;
@@ -785,6 +1036,7 @@ int bpf_gtrampoline_link_prog(struct bpf_gtramp_link *link)
* lock instead.
*/
kfunc_md_lock();
+ linked = false;
md = kfunc_md_create((long)entry->addr, entry->nr_args);
if (md) {
/* the function is not in the filter hash of gtr,
@@ -793,16 +1045,27 @@ int bpf_gtrampoline_link_prog(struct bpf_gtramp_link *link)
if (!md->bpf_prog_cnt)
update = true;
err = kfunc_md_bpf_link(md, prog, kind, entry->cookie);
+ if (!err)
+ linked = true;
} else {
err = -ENOMEM;
}
+ kfunc_md_unlock();
- if (err) {
- kfunc_md_put_entry(md);
- kfunc_md_unlock();
- goto on_fallback;
+ if (!err) {
+ err = bpf_gtrampoline_link_tramp(entry, prog);
+ if (!err)
+ continue;
}
+
+ /* on error case, fallback the md and previous */
+ kfunc_md_lock();
+ md = kfunc_md_get_noref((long)entry->addr);
+ if (linked)
+ kfunc_md_bpf_unlink(md, prog, kind);
+ kfunc_md_put_entry(md);
kfunc_md_unlock();
+ goto on_fallback;
}
if (update) {
diff --git a/kernel/trace/kfunc_md.c b/kernel/trace/kfunc_md.c
index ebb4e46d482d..5d61a8be3768 100644
--- a/kernel/trace/kfunc_md.c
+++ b/kernel/trace/kfunc_md.c
@@ -141,7 +141,8 @@ static int kfunc_md_hash_bpf_ips(void **ips)
for (i = 0; i < (1 << KFUNC_MD_HASH_BITS); i++) {
head = &kfunc_md_table[i];
hlist_for_each_entry(md, head, hash) {
- if (md->bpf_prog_cnt > !!(md->flags & KFUNC_MD_FL_BPF_REMOVING))
+ if (md->bpf_prog_cnt > !!(md->flags & KFUNC_MD_FL_BPF_REMOVING) &&
+ !md->tramp)
ips[c++] = (void *)md->func;
}
}
@@ -472,7 +473,8 @@ static int kfunc_md_fast_bpf_ips(void **ips)
for (i = 0; i < kfunc_mds->kfunc_md_count; i++) {
md = &kfunc_mds->mds[i];
- if (md->users && md->bpf_prog_cnt > !!(md->flags & KFUNC_MD_FL_BPF_REMOVING))
+ if (md->users && md->bpf_prog_cnt > !!(md->flags & KFUNC_MD_FL_BPF_REMOVING) &&
+ !md->tramp)
ips[c++] = (void *)md->func;
}
return c;
@@ -662,6 +664,9 @@ int kfunc_md_bpf_unlink(struct kfunc_md *md, struct bpf_prog *prog, int type)
!md->bpf_progs[BPF_TRAMP_MODIFY_RETURN])
md->flags &= ~KFUNC_MD_FL_TRACING_ORIGIN;
+ if (!md->bpf_prog_cnt)
+ md->tramp = NULL;
+
return 0;
}
--
2.39.5
Powered by blists - more mailing lists