[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cf62c79d-cba5-49dc-9099-fc86d54ee864@linux.dev>
Date: Mon, 4 Nov 2024 16:10:52 -0800
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Xu Kuohai <xukuohai@...weicloud.com>
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, Andrii Nakryiko <andrii@...nel.org>,
Eduard Zingerman <eddyz87@...il.com>, Yonghong Song
<yonghong.song@...ux.dev>, Kui-Feng Lee <thinker.li@...il.com>,
bpf@...r.kernel.org, netdev@...r.kernel.org
Subject: Re: [PATCH bpf-next v2] bpf: Add kernel symbol for struct_ops
trampoline
On 11/1/24 4:19 AM, Xu Kuohai wrote:
> static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> void *value, u64 flags)
> {
> @@ -601,6 +633,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> int prog_fd, err;
> u32 i, trampoline_start, image_off = 0;
> void *cur_image = NULL, *image = NULL;
> + struct bpf_ksym *ksym;
>
> if (flags)
> return -EINVAL;
> @@ -640,6 +673,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> kdata = &kvalue->data;
>
> module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
> + ksym = st_map->ksyms;
> for_each_member(i, t, member) {
> const struct btf_type *mtype, *ptype;
> struct bpf_prog *prog;
> @@ -735,6 +769,11 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>
> /* put prog_id to udata */
> *(unsigned long *)(udata + moff) = prog->aux->id;
> +
> + /* init ksym for this trampoline */
> + bpf_struct_ops_ksym_init(prog, image + trampoline_start,
> + image_off - trampoline_start,
> + ksym++);
> }
>
> if (st_ops->validate) {
> @@ -790,6 +829,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> unlock:
> kfree(tlinks);
> mutex_unlock(&st_map->lock);
> + if (!err)
> + bpf_struct_ops_map_ksyms_add(st_map);
> return err;
> }
>
> static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
> {
> const struct bpf_struct_ops_desc *st_ops_desc;
> @@ -905,6 +963,8 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
> struct bpf_map *map;
> struct btf *btf;
> int ret;
> + size_t ksyms_offset;
> + u32 ksyms_cnt;
>
> if (attr->map_flags & BPF_F_VTYPE_BTF_OBJ_FD) {
> /* The map holds btf for its whole life time. */
> @@ -951,6 +1011,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
> */
> (vt->size - sizeof(struct bpf_struct_ops_value));
>
> + st_map_size = round_up(st_map_size, sizeof(struct bpf_ksym));
> + ksyms_offset = st_map_size;
> + ksyms_cnt = count_func_ptrs(btf, t);
> + st_map_size += ksyms_cnt * sizeof(struct bpf_ksym);
> +
> st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
> if (!st_map) {
> ret = -ENOMEM;
> @@ -958,6 +1023,8 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
> }
>
> st_map->st_ops_desc = st_ops_desc;
> + st_map->ksyms = (void *)st_map + ksyms_offset;
nit. The st_map->ksyms is very similar to the existing st_map->links. Can we do
the allocation similar to the st_map->links and use another bpf_map_area_alloc()
instead of doing the round_up() and then figuring out the ksyms_offset.
> + st_map->ksyms_cnt = ksyms_cnt;
The same goes for ksyms_cnt. ksyms_cnt is almost the same as the
st_map->links_cnt. st_map->links_cnt unnecessarily includes the non func ptr
(i.e. a waste). The st_map->links[i] must be NULL if the i-th member of a struct
is not a func ptr.
If this patch adds the count_func_ptrs(), I think at least just have one
variable to mean funcs_cnt instead of adding another new ksyms_cnt. Both the
existing st_map->links and the new st_map->ksyms can use the same funcs_cnt. An
adjustment is needed for link in update_elem (probably use link++ similar to
your ksym++ idea). bpf_struct_ops_map_put_progs() should work as is.
Also, the actual bpf_link is currently allocated during update_elem() only when
there is a bpf prog for an ops. The new st_map->ksyms pre-allocated everything
during map_alloc() regardless if there will be a bpf prog (e.g.
tcp_congestion_ops has 5 optional ops). I don't have a strong opinion on
pre-allocate everything in map_alloc() or allocate on-demand in update_elem().
However, considering bpf_ksym has a "char name[KSYM_NAME_LEN]", the on-demand
allocation on bpf_link becomes not very useful. If the next respin stays with
the pre-allocate everything way, it is useful to followup later to stay with one
way and do the same for bpf_link.
Powered by blists - more mailing lists