[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <174642a334760af39a5e7bacdd8b977b392a82c7.camel@gmail.com>
Date: Tue, 21 Oct 2025 11:59:02 -0700
From: Eduard Zingerman <eddyz87@...il.com>
To: Donglin Peng <dolinux.peng@...il.com>, ast@...nel.org
Cc: linux-kernel@...r.kernel.org, bpf@...r.kernel.org, Andrii Nakryiko
<andrii.nakryiko@...il.com>, Alan Maguire <alan.maguire@...cle.com>, Song
Liu <song@...nel.org>, pengdonglin <pengdonglin@...omi.com>
Subject: Re: [RFC PATCH v2 2/5] btf: sort BTF types by kind and name to
enable binary search
On Mon, 2025-10-20 at 17:39 +0800, Donglin Peng wrote:
> This patch implements sorting of BTF types by their kind and name,
> enabling the use of binary search for type lookups.
>
> To share logic between kernel and libbpf, a new btf_sort.c file is
> introduced containing common sorting functionality.
>
> The sorting is performed during btf__dedup() when the new
> sort_by_kind_name option in btf_dedup_opts is enabled.
Do we really need this option? Dedup is free to rearrange btf types
anyway, so why not sort always? Is execution time a concern?
> For vmlinux and kernel module BTF, btf_check_sorted() verifies
> whether the types are sorted and binary search can be used.
[...]
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index c414cf37e1bd..11b05f4eb07d 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -259,6 +259,7 @@ struct btf {
> void *nohdr_data;
> struct btf_header hdr;
> u32 nr_types; /* includes VOID for base BTF */
> + u32 nr_sorted_types;
> u32 types_size;
> u32 data_size;
> refcount_t refcnt;
> @@ -544,33 +545,29 @@ u32 btf_nr_types(const struct btf *btf)
> return total;
> }
>
> -u32 btf_type_cnt(const struct btf *btf)
> +u32 btf_start_id(const struct btf *btf)
> {
> - return btf->start_id + btf->nr_types;
> + return btf->start_id;
> }
>
> -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> +u32 btf_nr_sorted_types(const struct btf *btf)
> {
> - const struct btf_type *t;
> - const char *tname;
> - u32 i, total;
> -
> - do {
> - total = btf_type_cnt(btf);
> - for (i = btf->start_id; i < total; i++) {
> - t = btf_type_by_id(btf, i);
> - if (BTF_INFO_KIND(t->info) != kind)
> - continue;
> + return btf->nr_sorted_types;
> +}
>
> - tname = btf_name_by_offset(btf, t->name_off);
> - if (!strcmp(tname, name))
> - return i;
> - }
> +void btf_set_nr_sorted_types(struct btf *btf, u32 nr)
> +{
> + btf->nr_sorted_types = nr;
> +}
>
> - btf = btf->base_btf;
> - } while (btf);
> +u32 btf_type_cnt(const struct btf *btf)
> +{
> + return btf->start_id + btf->nr_types;
> +}
>
> - return -ENOENT;
> +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> +{
> + return find_btf_by_name_kind(btf, 1, name, kind);
^^^
nit: this will make it impossible to find "void" w/o a special case
in the find_btf_by_name_kind(), why not start from 0?
> }
>
> s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
[...]
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 18907f0fcf9f..87e47f0b78ba 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
[...]
> +/*
> + * Compact and sort BTF types.
> + *
> + * Similar to btf_dedup_compact_types, but additionally sorts the btf_types.
> + */
> +static int btf__dedup_compact_and_sort_types(struct btf_dedup *d)
> +{
And this function will become btf__dedup_compact_types(),
if BTF will be always sorted. Thus removing some code duplication.
[...]
> diff --git a/tools/lib/bpf/btf_sort.c b/tools/lib/bpf/btf_sort.c
> new file mode 100644
> index 000000000000..2ad4a56f1c08
> --- /dev/null
> +++ b/tools/lib/bpf/btf_sort.c
[...]
> +/*
> + * Sort BTF types by kind and name in ascending order, placing named types
> + * before anonymous ones.
> + */
> +int btf_compare_type_kinds_names(const void *a, const void *b, void *priv)
> +{
> + struct btf *btf = (struct btf *)priv;
> + struct btf_type *ta = btf_type_by_id(btf, *(__u32 *)a);
> + struct btf_type *tb = btf_type_by_id(btf, *(__u32 *)b);
> + const char *na, *nb;
> + int ka, kb;
> +
> + /* ta w/o name is greater than tb */
> + if (!ta->name_off && tb->name_off)
> + return 1;
> + /* tb w/o name is smaller than ta */
> + if (ta->name_off && !tb->name_off)
> + return -1;
> +
> + ka = btf_kind(ta);
> + kb = btf_kind(tb);
> + na = btf__str_by_offset(btf, ta->name_off);
> + nb = btf__str_by_offset(btf, tb->name_off);
> +
> + return cmp_btf_kind_name(ka, na, kb, nb);
If both types are anonymous and have the same kind, this will lead to
strcmp(NULL, NULL). On kernel side that would lead to null pointer
dereference.
> +}
> +
> +__s32 find_btf_by_name_kind(const struct btf *btf, int start_id,
> + const char *type_name, __u32 kind)
Nit: having functions with names btf_find_by_name_kind and
find_btf_by_name_kind
is very confusing.
Usually we use names like __<func> for auxiliary functions
like this.
> +{
> + const struct btf_type *t;
> + const char *tname;
> + __u32 i, total;
> +
> + if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> + return 0;
> +
> + do {
> + if (btf__nr_sorted_types(btf)) {
> + /* binary search */
> + __s32 start, end, mid, found = -1;
> + int ret;
> +
> + start = btf__start_id(btf);
> + end = start + btf__nr_sorted_types(btf) - 1;
> + /* found the leftmost btf_type that matches */
> + while(start <= end) {
> + mid = start + (end - start) / 2;
> + t = btf_type_by_id(btf, mid);
> + tname = btf__name_by_offset(btf, t->name_off);
> + ret = cmp_btf_kind_name(BTF_INFO_KIND(t->info), tname,
> + kind, type_name);
> + if (ret == 0)
> + found = mid;
> + if (ret < 0)
> + start = mid + 1;
> + else if (ret >= 0)
> + end = mid - 1;
> + }
> +
> + if (found != -1)
> + return found;
> + } else {
> + /* linear search */
> + total = btf__type_cnt(btf);
> + for (i = btf__start_id(btf); i < total; i++) {
> + t = btf_type_by_id(btf, i);
> + if (btf_kind(t) != kind)
> + continue;
> +
> + tname = btf__name_by_offset(btf, t->name_off);
> + if (tname && !strcmp(tname, type_name))
> + return i;
> + }
> + }
> +
> + btf = btf__base_btf(btf);
> + } while (btf && btf__start_id(btf) >= start_id);
> +
> + return libbpf_err(-ENOENT);
> +}
> +
> +void btf_check_sorted(struct btf *btf, int start_id)
> +{
> + const struct btf_type *t;
> + int i, n, nr_sorted_types;
> +
> + n = btf__type_cnt(btf);
> + if ((n - start_id) < BTF_CHECK_SORT_THRESHOLD)
> + return;
Are there any measurable performance benefits from having this special case?
> +
> + n--;
> + nr_sorted_types = 0;
> + for (i = start_id; i < n; i++) {
> + int k = i + 1;
> +
> + t = btf_type_by_id(btf, i);
> + if (!btf__str_by_offset(btf, t->name_off))
> + return;
I am confused.
This effectively bans BTFs with anonymous types,
as btf__set_nr_sorted_types() wont be called if such types are found.
Anonymous types are very common, e.g. all FUNC_PROTO are anonymous.
> +
> + t = btf_type_by_id(btf, k);
> + if (!btf__str_by_offset(btf, t->name_off))
> + return;
> +
> + if (btf_compare_type_kinds_names(&i, &k, btf) > 0)
> + return;
> +
> + if (t->name_off)
> + nr_sorted_types++;
> + }
> +
> + t = btf_type_by_id(btf, start_id);
> + if (t->name_off)
> + nr_sorted_types++;
> + if (nr_sorted_types >= BTF_CHECK_SORT_THRESHOLD)
> + btf__set_nr_sorted_types(btf, nr_sorted_types);
> +}
> +
[...]
Powered by blists - more mailing lists