[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <b0f07f8d-841c-4d61-b5ae-108d26315297@huaweicloud.com>
Date: Tue, 12 Nov 2024 20:06:32 +0800
From: Xu Kuohai <xukuohai@...weicloud.com>
To: Martin KaFai Lau <martin.lau@...ux.dev>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org,
Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>, Eduard Zingerman <eddyz87@...il.com>,
Yonghong Song <yonghong.song@...ux.dev>, Kui-Feng Lee <thinker.li@...il.com>
Subject: Re: [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops
trampoline
On 11/12/2024 7:04 AM, Martin KaFai Lau wrote:
> On 11/11/24 4:16 AM, Xu Kuohai wrote:
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index e99fce81e916..d6dd56fc80d8 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>> struct bpf_struct_ops_map {
>> struct bpf_map map;
>> - struct rcu_head rcu;
>
> Since it needs a respin (more on it later), it will be useful to separate this cleanup as a separate patch in the same patch series.
>
OK
>> const struct bpf_struct_ops_desc *st_ops_desc;
>> /* protect map_update */
>> struct mutex lock;
>> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
>> * (in kvalue.data).
>> */
>> struct bpf_link **links;
>> + /* ksyms for bpf trampolines */
>> + struct bpf_ksym **ksyms;
>> u32 funcs_cnt;
>> u32 image_pages_cnt;
>> /* image_pages is an array of pages that has all the trampolines
>> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
>> return 0;
>> }
>> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
>> + void *image, unsigned int size,
>> + struct bpf_ksym *ksym)
>> +{
>> + snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
>> + INIT_LIST_HEAD_RCU(&ksym->lnode);
>> + bpf_image_ksym_init(image, size, ksym);
>> +}
>> +
>> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
>> +{
>> + u32 i;
>> +
>> + for (i = 0; i < st_map->funcs_cnt; i++) {
>> + if (!st_map->ksyms[i])
>> + break;
>> + bpf_image_ksym_add(st_map->ksyms[i]);
>> + }
>> +}
>> +
>> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
>> +{
>> + u32 i;
>> +
>> + for (i = 0; i < st_map->funcs_cnt; i++) {
>> + if (!st_map->ksyms[i])
>> + break;
>> + bpf_image_ksym_del(st_map->ksyms[i]);
>> + }
>> +}
>> +
>> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
>> +{
>> + u32 i;
>> +
>> + for (i = 0; i < st_map->funcs_cnt; i++) {
>> + if (!st_map->ksyms[i])
>> + break;
>> + kfree(st_map->ksyms[i]);
>> + st_map->links[i] = NULL;
>
> s/links/ksyms/
>
> pw-bot: cr
>
Oops, good catch
>> + }
>> +}
>> +
>> static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> void *value, u64 flags)
>> {
>> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> u32 i, trampoline_start, image_off = 0;
>> void *cur_image = NULL, *image = NULL;
>> struct bpf_link **plink;
>> + struct bpf_ksym **pksym;
>> + const char *tname, *mname;
>> if (flags)
>> return -EINVAL;
>> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> kdata = &kvalue->data;
>> plink = st_map->links;
>> + pksym = st_map->ksyms;
>> + tname = btf_name_by_offset(st_map->btf, t->name_off);
>> module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
>> for_each_member(i, t, member) {
>> const struct btf_type *mtype, *ptype;
>> struct bpf_prog *prog;
>> struct bpf_tramp_link *link;
>> + struct bpf_ksym *ksym;
>> u32 moff;
>> moff = __btf_member_bit_offset(t, member) / 8;
>> + mname = btf_name_by_offset(st_map->btf, member->name_off);
>> ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
>> if (ptype == module_type) {
>> if (*(void **)(udata + moff))
>> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> &bpf_struct_ops_link_lops, prog);
>> *plink++ = &link->link;
>> + ksym = kzalloc(sizeof(*ksym), GFP_USER);
>
> link is also using kzalloc but probably both link and ksym allocation should use bpf_map_kzalloc instead. This switch can be done for both together later as a follow up patch.
>
OK, will do.
>> + if (!ksym) {
>> + bpf_prog_put(prog);
>
> afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog ownership and the bpf_struct_ops_map_put_progs() at the error path will take care of it.
>
Right, good catch
>> + err = -ENOMEM;
>> + goto reset_unlock;
>> + }
>> + *pksym = ksym;
>
> nit. Follow the *plink++ style above and does the same *pksym++ here.
>
OK
>> +
>> trampoline_start = image_off;
>> err = bpf_struct_ops_prepare_trampoline(tlinks, link,
>> &st_ops->func_models[i],
>> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> /* put prog_id to udata */
>> *(unsigned long *)(udata + moff) = prog->aux->id;
>> +
>> + /* init ksym for this trampoline */
>> + bpf_struct_ops_ksym_init(tname, mname,
>> + image + trampoline_start,
>> + image_off - trampoline_start,
>> + *pksym++);
>
> then uses "ksym" here.
>
>> }
>> if (st_ops->validate) {
>> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> */
>> reset_unlock:
>> + bpf_struct_ops_map_free_ksyms(st_map);
>> bpf_struct_ops_map_free_image(st_map);
>> bpf_struct_ops_map_put_progs(st_map);
>> memset(uvalue, 0, map->value_size);
>> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> unlock:
>> kfree(tlinks);
>> mutex_unlock(&st_map->lock);
>> + if (!err)
>> + bpf_struct_ops_map_ksyms_add(st_map);
>> return err;
>> }
>> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>> if (st_map->links)
>> bpf_struct_ops_map_put_progs(st_map);
>> + if (st_map->ksyms)
>> + bpf_struct_ops_map_free_ksyms(st_map);
>> bpf_map_area_free(st_map->links);
>> + bpf_map_area_free(st_map->ksyms);
>> bpf_struct_ops_map_free_image(st_map);
>> bpf_map_area_free(st_map->uvalue);
>> bpf_map_area_free(st_map);
>> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>> if (btf_is_module(st_map->btf))
>> module_put(st_map->st_ops_desc->st_ops->owner);
>> + if (st_map->ksyms)
>
> This null test should not be needed.
>
Agree, will remove it.
>> + bpf_struct_ops_map_del_ksyms(st_map);
>> +
>> /* The struct_ops's function may switch to another struct_ops.
>> *
>> * For example, bpf_tcp_cc_x->init() may switch to
>> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>> st_map->links =
>> bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
>> NUMA_NO_NODE);
>> - if (!st_map->uvalue || !st_map->links) {
>> +
>> + st_map->ksyms =
>> + bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
>> + NUMA_NO_NODE);
>
> .map_mem_usage at least needs to include the st_map->ksyms[] pointer array. func_cnts should be used instead of btf_type_vlen(vt) for link also in .map_mem_usage.
>
Right, agree
>> + if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
>> ret = -ENOMEM;
>> goto errout_free;
>> }
Powered by blists - more mailing lists