[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <de10c18b-5861-911e-ace8-eb599b72b0a8@fb.com>
Date: Sun, 6 Jun 2021 22:36:57 -0700
From: Yonghong Song <yhs@...com>
To: Jiri Olsa <jolsa@...nel.org>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andriin@...com>,
"Steven Rostedt (VMware)" <rostedt@...dmis.org>
CC: <netdev@...r.kernel.org>, <bpf@...r.kernel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...omium.org>, Daniel Xu <dxu@...uu.xyz>,
Viktor Malik <vmalik@...hat.com>
Subject: Re: [PATCH 13/19] bpf: Add support to link multi func tracing program
On 6/5/21 4:10 AM, Jiri Olsa wrote:
> Adding support to attach multiple functions to tracing program
> by using the link_create/link_update interface.
>
> Adding multi_btf_ids/multi_btf_ids_cnt pair to link_create struct
> API, that define array of functions btf ids that will be attached
> to prog_fd.
>
> The prog_fd needs to be multi prog tracing program (BPF_F_MULTI_FUNC).
>
> The new link_create interface creates new BPF_LINK_TYPE_TRACING_MULTI
> link type, which creates separate bpf_trampoline and registers it
> as direct function for all specified btf ids.
>
> The new bpf_trampoline is out of scope (bpf_trampoline_lookup) of
> standard trampolines, so all registered functions need to be free
> of direct functions, otherwise the link fails.
I am not sure how severe such a limitation could be in practice.
It is possible in production some non-multi fentry/fexit program
may run continuously. Does kprobe program impact this as well?
>
> The new bpf_trampoline will store and pass to bpf program the highest
> number of arguments from all given functions.
>
> New programs (fentry or fexit) can be added to the existing trampoline
> through the link_update interface via new_prog_fd descriptor.
Looks we do not support replacing old programs. Do we support
removing old programs?
>
> Signed-off-by: Jiri Olsa <jolsa@...nel.org>
> ---
> include/linux/bpf.h | 3 +
> include/uapi/linux/bpf.h | 5 +
> kernel/bpf/syscall.c | 185 ++++++++++++++++++++++++++++++++-
> kernel/bpf/trampoline.c | 53 +++++++---
> tools/include/uapi/linux/bpf.h | 5 +
> 5 files changed, 237 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 23221e0e8d3c..99a81c6c22e6 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -661,6 +661,7 @@ struct bpf_trampoline {
> struct bpf_tramp_image *cur_image;
> u64 selector;
> struct module *mod;
> + bool multi;
> };
>
> struct bpf_attach_target_info {
> @@ -746,6 +747,8 @@ void bpf_ksym_add(struct bpf_ksym *ksym);
> void bpf_ksym_del(struct bpf_ksym *ksym);
> int bpf_jit_charge_modmem(u32 pages);
> void bpf_jit_uncharge_modmem(u32 pages);
> +struct bpf_trampoline *bpf_trampoline_multi_alloc(void);
> +void bpf_trampoline_multi_free(struct bpf_trampoline *tr);
> #else
> static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
> struct bpf_trampoline *tr)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index ad9340fb14d4..5fd6ff64e8dc 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1007,6 +1007,7 @@ enum bpf_link_type {
> BPF_LINK_TYPE_ITER = 4,
> BPF_LINK_TYPE_NETNS = 5,
> BPF_LINK_TYPE_XDP = 6,
> + BPF_LINK_TYPE_TRACING_MULTI = 7,
>
> MAX_BPF_LINK_TYPE,
> };
> @@ -1454,6 +1455,10 @@ union bpf_attr {
> __aligned_u64 iter_info; /* extra bpf_iter_link_info */
> __u32 iter_info_len; /* iter_info length */
> };
> + struct {
> + __aligned_u64 multi_btf_ids; /* addresses to attach */
> + __u32 multi_btf_ids_cnt; /* addresses count */
> + };
> };
> } link_create;
>
[...]
> +static int bpf_tracing_multi_link_fill_link_info(const struct bpf_link *link,
> + struct bpf_link_info *info)
> +{
> + struct bpf_tracing_multi_link *tr_link =
> + container_of(link, struct bpf_tracing_multi_link, link);
> +
> + info->tracing.attach_type = tr_link->attach_type;
> + return 0;
> +}
> +
> +static int check_multi_prog_type(struct bpf_prog *prog)
> +{
> + if (!prog->aux->multi_func &&
> + prog->type != BPF_PROG_TYPE_TRACING)
I think prog->type != BPF_PROG_TYPE_TRACING is not needed, it should
have been checked during program load time?
> + return -EINVAL;
> + if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
> + prog->expected_attach_type != BPF_TRACE_FEXIT)
> + return -EINVAL;
> + return 0;
> +}
> +
> +static int bpf_tracing_multi_link_update(struct bpf_link *link,
> + struct bpf_prog *new_prog,
> + struct bpf_prog *old_prog __maybe_unused)
> +{
> + struct bpf_tracing_multi_link *tr_link =
> + container_of(link, struct bpf_tracing_multi_link, link);
> + int err;
> +
> + if (check_multi_prog_type(new_prog))
> + return -EINVAL;
> +
> + err = bpf_trampoline_link_prog(new_prog, tr_link->tr);
> + if (err)
> + return err;
> +
> + err = modify_ftrace_direct_multi(&tr_link->ops,
> + (unsigned long) tr_link->tr->cur_image->image);
> + return WARN_ON(err);
Why WARN_ON here? Some comments will be good.
> +}
> +
> +static const struct bpf_link_ops bpf_tracing_multi_link_lops = {
> + .release = bpf_tracing_multi_link_release,
> + .dealloc = bpf_tracing_multi_link_dealloc,
> + .show_fdinfo = bpf_tracing_multi_link_show_fdinfo,
> + .fill_link_info = bpf_tracing_multi_link_fill_link_info,
> + .update_prog = bpf_tracing_multi_link_update,
> +};
> +
[...]
> +
> struct bpf_raw_tp_link {
> struct bpf_link link;
> struct bpf_raw_event_map *btp;
> @@ -3043,6 +3222,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
> case BPF_CGROUP_SETSOCKOPT:
> return BPF_PROG_TYPE_CGROUP_SOCKOPT;
> case BPF_TRACE_ITER:
> + case BPF_TRACE_FENTRY:
> + case BPF_TRACE_FEXIT:
> return BPF_PROG_TYPE_TRACING;
> case BPF_SK_LOOKUP:
> return BPF_PROG_TYPE_SK_LOOKUP;
> @@ -4099,6 +4280,8 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
>
> if (prog->expected_attach_type == BPF_TRACE_ITER)
> return bpf_iter_link_attach(attr, uattr, prog);
> + else if (prog->aux->multi_func)
> + return bpf_tracing_multi_attach(prog, attr);
> else if (prog->type == BPF_PROG_TYPE_EXT)
> return bpf_tracing_prog_attach(prog,
> attr->link_create.target_fd,
> @@ -4106,7 +4289,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
> return -EINVAL;
> }
>
> -#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
> +#define BPF_LINK_CREATE_LAST_FIELD link_create.multi_btf_ids_cnt
It is okay that we don't change this. link_create.iter_info_len
has the same effect since it is a union.
> static int link_create(union bpf_attr *attr, bpfptr_t uattr)
> {
> enum bpf_prog_type ptype;
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 2755fdcf9fbf..660b8197c27f 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -58,7 +58,7 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
> PAGE_SIZE, true, ksym->name);
> }
>
> -static struct bpf_trampoline *bpf_trampoline_alloc(void)
> +static struct bpf_trampoline *bpf_trampoline_alloc(bool multi)
> {
> struct bpf_trampoline *tr;
> int i;
> @@ -72,6 +72,7 @@ static struct bpf_trampoline *bpf_trampoline_alloc(void)
> mutex_init(&tr->mutex);
> for (i = 0; i < BPF_TRAMP_MAX; i++)
> INIT_HLIST_HEAD(&tr->progs_hlist[i]);
> + tr->multi = multi;
> return tr;
> }
>
> @@ -88,7 +89,7 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> goto out;
> }
> }
> - tr = bpf_trampoline_alloc();
> + tr = bpf_trampoline_alloc(false);
> if (tr) {
> tr->key = key;
> hlist_add_head(&tr->hlist, head);
> @@ -343,14 +344,16 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
> struct bpf_tramp_image *im;
> struct bpf_tramp_progs *tprogs;
> u32 flags = BPF_TRAMP_F_RESTORE_REGS;
> - int err, total;
> + bool update = !tr->multi;
> + int err = 0, total;
>
> tprogs = bpf_trampoline_get_progs(tr, &total);
> if (IS_ERR(tprogs))
> return PTR_ERR(tprogs);
>
> if (total == 0) {
> - err = unregister_fentry(tr, tr->cur_image->image);
> + if (update)
> + err = unregister_fentry(tr, tr->cur_image->image);
> bpf_tramp_image_put(tr->cur_image);
> tr->cur_image = NULL;
> tr->selector = 0;
> @@ -363,9 +366,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
> goto out;
> }
>
> + if (tr->multi)
> + flags |= BPF_TRAMP_F_IP_ARG;
> +
> if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
> - tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
> + tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) {
> flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
> + if (tr->multi)
> + flags |= BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_IP_ARG;
BPF_TRAMP_F_IP_ARG is not needed. It has been added before.
> + }
>
> err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
> &tr->func.model, flags, tprogs,
> @@ -373,16 +382,19 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
> if (err < 0)
> goto out;
>
> + err = 0;
> WARN_ON(tr->cur_image && tr->selector == 0);
> WARN_ON(!tr->cur_image && tr->selector);
> - if (tr->cur_image)
> - /* progs already running at this address */
> - err = modify_fentry(tr, tr->cur_image->image, im->image);
> - else
> - /* first time registering */
> - err = register_fentry(tr, im->image);
> - if (err)
> - goto out;
> + if (update) {
> + if (tr->cur_image)
> + /* progs already running at this address */
> + err = modify_fentry(tr, tr->cur_image->image, im->image);
> + else
> + /* first time registering */
> + err = register_fentry(tr, im->image);
> + if (err)
> + goto out;
> + }
> if (tr->cur_image)
> bpf_tramp_image_put(tr->cur_image);
> tr->cur_image = im;
> @@ -436,6 +448,10 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
> err = -EBUSY;
> goto out;
> }
> + if (tr->multi) {
> + err = -EINVAL;
> + goto out;
> + }
> tr->extension_prog = prog;
> err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
> prog->bpf_func);
[...]
Powered by blists - more mailing lists