linux-kernel - Re: [RFC PATCH v3 1/3] btf: implement BTF type sorting for accelerated lookups

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzZXVARn5_-3eMmPupU-gun7p3VX-VuCVOuHBC0o0L-Pjg@mail.gmail.com>
Date: Tue, 28 Oct 2025 11:38:31 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Donglin Peng <dolinux.peng@...il.com>
Cc: ast@...nel.org, linux-kernel@...r.kernel.org, bpf@...r.kernel.org, 
	Eduard Zingerman <eddyz87@...il.com>, Alan Maguire <alan.maguire@...cle.com>, Song Liu <song@...nel.org>, 
	pengdonglin <pengdonglin@...omi.com>
Subject: Re: [RFC PATCH v3 1/3] btf: implement BTF type sorting for
 accelerated lookups

On Mon, Oct 27, 2025 at 6:54 AM Donglin Peng <dolinux.peng@...il.com> wrote:
>
> This patch introduces a new libbpf interface btf__permute() to reorganize
> BTF types according to a provided mapping. The BTF lookup mechanism is
> enhanced with binary search capability, significantly improving lookup
> performance for large type sets.
>
> The pahole tool can invoke this interface with a sorted type ID array,
> enabling binary search in both user space and kernel. To share core logic
> between kernel and libbpf, common sorting functionality is implemented
> in a new btf_sort.c source file.
>
> Cc: Eduard Zingerman <eddyz87@...il.com>
> Cc: Alexei Starovoitov <ast@...nel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@...il.com>
> Cc: Alan Maguire <alan.maguire@...cle.com>
> Cc: Song Liu <song@...nel.org>
> Co-developed-by: Eduard Zingerman <eddyz87@...il.com>
> Signed-off-by: pengdonglin <pengdonglin@...omi.com>
> Signed-off-by: Donglin Peng <dolinux.peng@...il.com>
> ---
> v2->v3:
> - Remove sorting logic from libbpf and provide a generic btf__permute() interface
> - Remove the search direction patch since sorted lookup provides sufficient performance
>   and changing search order could cause conflicts between BTF and base BTF
> - Include btf_sort.c directly in btf.c to reduce function call overhead
> ---
>  tools/lib/bpf/btf.c            | 262 ++++++++++++++++++++++++++++++---
>  tools/lib/bpf/btf.h            |  17 +++
>  tools/lib/bpf/btf_sort.c       | 174 ++++++++++++++++++++++
>  tools/lib/bpf/btf_sort.h       |  11 ++
>  tools/lib/bpf/libbpf.map       |   6 +
>  tools/lib/bpf/libbpf_version.h |   2 +-
>  6 files changed, 447 insertions(+), 25 deletions(-)
>  create mode 100644 tools/lib/bpf/btf_sort.c
>  create mode 100644 tools/lib/bpf/btf_sort.h
>

This looks a bit over-engineered, let's try to simplify and have more
succinct implementation

pw-bot: cr

> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 18907f0fcf9f..d20bf81a21ce 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -23,6 +23,7 @@
>  #include "libbpf_internal.h"
>  #include "hashmap.h"
>  #include "strset.h"
> +#include "btf_sort.h"
>
>  #define BTF_MAX_NR_TYPES 0x7fffffffU
>  #define BTF_MAX_STR_OFFSET 0x7fffffffU
> @@ -92,6 +93,12 @@ struct btf {
>          *   - for split BTF counts number of types added on top of base BTF.
>          */
>         __u32 nr_types;
> +       /* number of sorted and named types in this BTF instance:
> +        *   - doesn't include special [0] void type;
> +        *   - for split BTF counts number of sorted and named types added on
> +        *     top of base BTF.
> +        */
> +       __u32 nr_sorted_types;
>         /* if not NULL, points to the base BTF on top of which the current
>          * split BTF is based
>          */
> @@ -624,6 +631,11 @@ const struct btf *btf__base_btf(const struct btf *btf)
>         return btf->base_btf;
>  }
>
> +__u32 btf__start_id(const struct btf *btf)
> +{
> +       return btf->start_id;
> +}

this can already be determined using btf__base_btf() (and then getting
its btf__type_cnt()), but I guess I don't mind having a small
dedicated API for this. But please add it as a separate patch

> +
>  /* internal helper returning non-const pointer to a type */
>  struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)
>  {
> @@ -915,38 +927,16 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
>         return libbpf_err(-ENOENT);
>  }
>
> -static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> -                                  const char *type_name, __u32 kind)
> -{
> -       __u32 i, nr_types = btf__type_cnt(btf);
> -
> -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> -               return 0;
> -
> -       for (i = start_id; i < nr_types; i++) {
> -               const struct btf_type *t = btf__type_by_id(btf, i);
> -               const char *name;
> -
> -               if (btf_kind(t) != kind)
> -                       continue;
> -               name = btf__name_by_offset(btf, t->name_off);
> -               if (name && !strcmp(type_name, name))
> -                       return i;
> -       }
> -
> -       return libbpf_err(-ENOENT);
> -}
> -
>  __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
>                                  __u32 kind)
>  {
> -       return btf_find_by_name_kind(btf, btf->start_id, type_name, kind);
> +       return _btf_find_by_name_kind(btf, btf->start_id, type_name, kind);
>  }
>
>  __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
>                              __u32 kind)
>  {
> -       return btf_find_by_name_kind(btf, 1, type_name, kind);
> +       return _btf_find_by_name_kind(btf, 1, type_name, kind);

nit: please avoid using underscore-prefixed names

>  }
>
>  static bool btf_is_modifiable(const struct btf *btf)
> @@ -1091,6 +1081,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
>         err = err ?: btf_sanity_check(btf);
>         if (err)
>                 goto done;
> +       btf_check_sorted(btf, btf->start_id);

let's do this lazily when we actually need to search by name, that
will also work better with invalidation (currently you don't recheck
sortedness after invalidation)

[...]

> +/*
> + * Permute BTF types in-place using the ID mapping from btf_permute_opts->ids.
> + * After permutation, all type ID references are updated to reflect the new
> + * ordering. If a struct btf_ext (representing '.BTF.ext' section) is provided,
> + * type ID references within the BTF extension data are also updated.
> + */

See how we provide doc comment for public APIs and do the same with btf__permute

> +int btf__permute(struct btf *btf, const struct btf_permute_opts *opts)

id map array is mandatory parameter which will always be specified,
make it a fixed argument. We use opts for something that's optional
and/or infrequently used. btf_ext being part of opts makes total
sense, though.

> +{
> +       struct btf_permute *p;
> +       int err = 0;
> +
> +       if (!OPTS_VALID(opts, btf_permute_opts))
> +               return libbpf_err(-EINVAL);
> +
> +       p = btf_permute_new(btf, opts);
> +       if (!p) {
> +               pr_debug("btf_permute_new failed: %ld\n", PTR_ERR(p));
> +               return libbpf_err(-EINVAL);
> +       }
> +
> +       if (btf_ensure_modifiable(btf)) {
> +               err = -ENOMEM;
> +               goto done;
> +       }
> +
> +       err = btf_permute_shuffle_types(p);
> +       if (err < 0) {
> +               pr_debug("btf_permute_shuffle_types failed: %s\n", errstr(err));
> +               goto done;
> +       }
> +       err = btf_permute_remap_types(p);

can't we remap IDs as we shuffle and move types around? I'm not sure
we need entire struct btf_permute to keep track of all of this, this
can be a local state in a single function

> +       if (err) {
> +               pr_debug("btf_permute_remap_types failed: %s\n", errstr(err));
> +               goto done;
> +       }
> +
> +done:
> +       btf_permute_free(p);
> +       return libbpf_err(err);
> +}
> +

[...]

> +/*
> + * Shuffle BTF types.
> + *
> + * Rearranges types according to the permutation map in p->ids. The p->map
> + * array stores the mapping from original type IDs to new shuffled IDs,
> + * which is used in the next phase to update type references.
> + */
> +static int btf_permute_shuffle_types(struct btf_permute *p)
> +{
> +       struct btf *btf = p->btf;
> +       const struct btf_type *t;
> +       __u32 *new_offs = NULL;
> +       void *l, *new_types = NULL;
> +       int i, id, len, err;
> +
> +       new_offs = calloc(btf->nr_types, sizeof(*new_offs));
> +       new_types = calloc(btf->hdr->type_len, 1);
> +       if (!new_types || !new_offs) {
> +               err = -ENOMEM;
> +               goto out_err;
> +       }
> +
> +       l = new_types;

What does "l" refer to in this name? It rings no bells for me...

> +       for (i = 0; i < btf->nr_types; i++) {

this won't work with split BTF, no?

> +               id = p->ids[i];
> +               t = btf__type_by_id(btf, id);
> +               len = btf_type_size(t);
> +               memcpy(l, t, len);
> +               new_offs[i] = l - new_types;
> +               p->map[id - btf->start_id] = btf->start_id + i;
> +               l += len;
> +       }
> +
> +       free(btf->types_data);
> +       free(btf->type_offs);
> +       btf->types_data = new_types;
> +       btf->type_offs = new_offs;
> +       return 0;
> +

[...]

> diff --git a/tools/lib/bpf/btf_sort.c b/tools/lib/bpf/btf_sort.c
> new file mode 100644
> index 000000000000..553c5f5e61bd
> --- /dev/null
> +++ b/tools/lib/bpf/btf_sort.c

why does this have to be a separate file? can't it be part of btf.c?

> @@ -0,0 +1,174 @@
> +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> +/* Copyright (c) 2025 Xiaomi */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#ifdef __KERNEL__
> +
> +#define btf_type_by_id                         (struct btf_type *)btf_type_by_id
> +#define btf__str_by_offset                     btf_str_by_offset
> +#define btf__type_cnt                          btf_nr_types
> +#define btf__start_id                          btf_start_id
> +#define libbpf_err(x)                          x
> +
> +#else
> +
> +#define notrace
> +
> +#endif /* __KERNEL__ */
> +
> +/*
> + * Skip the sorted check if the number of BTF types is below this threshold.
> + * The value 4 is chosen based on the theoretical break-even point where
> + * linear search (N/2) and binary search (LOG2(N)) require approximately
> + * the same number of comparisons.
> + */
> +#define BTF_CHECK_SORT_THRESHOLD  4

I agree with Eduard, it seems like an overkill. For small BTFs this
all doesn't matter anyways.

> +
> +struct btf;
> +
> +static int cmp_btf_kind_name(int ka, const char *na, int kb, const char *nb)
> +{
> +       return (ka - kb) ?: strcmp(na, nb);
> +}
> +
> +/*
> + * Sort BTF types by kind and name in ascending order, placing named types
> + * before anonymous ones.
> + */
> +static int btf_compare_type_kinds_names(const void *a, const void *b, void *priv)
> +{
> +       struct btf *btf = (struct btf *)priv;
> +       struct btf_type *ta = btf_type_by_id(btf, *(__u32 *)a);
> +       struct btf_type *tb = btf_type_by_id(btf, *(__u32 *)b);
> +       const char *na, *nb;
> +       bool anon_a, anon_b;
> +       int ka, kb;
> +
> +       na = btf__str_by_offset(btf, ta->name_off);
> +       nb = btf__str_by_offset(btf, tb->name_off);
> +       anon_a = str_is_empty(na);
> +       anon_b = str_is_empty(nb);
> +
> +       /* ta w/o name is greater than tb */
> +       if (anon_a && !anon_b)
> +               return 1;
> +       /* tb w/o name is smaller than ta */
> +       if (!anon_a && anon_b)
> +               return -1;
> +
> +       ka = btf_kind(ta);
> +       kb = btf_kind(tb);
> +
> +       if (anon_a && anon_b)
> +               return ka - kb;
> +
> +       return cmp_btf_kind_name(ka, na, kb, nb);
> +}

I think we should keep it simple and only use sorted-by-name property.
Within the same name, we can just search linearly for necessary kind.

> +
> +static __s32 notrace __btf_find_by_name_kind(const struct btf *btf, int start_id,
> +                                  const char *type_name, __u32 kind)
> +{
> +       const struct btf_type *t;
> +       const char *tname;
> +       int err = -ENOENT;
> +
> +       if (!btf)
> +               goto out;
> +
> +       if (start_id < btf__start_id(btf)) {
> +               err = __btf_find_by_name_kind(btf->base_btf, start_id, type_name, kind);
> +               if (err == -ENOENT)
> +                       start_id = btf__start_id(btf);
> +       }
> +
> +       if (err == -ENOENT) {
> +               if (btf->nr_sorted_types) {
> +                       /* binary search */
> +                       __s32 start, end, mid, found = -1;
> +                       int ret;
> +
> +                       start = start_id;
> +                       end = start + btf->nr_sorted_types - 1;
> +                       /* found the leftmost btf_type that matches */
> +                       while(start <= end) {
> +                               mid = start + (end - start) / 2;

nit: binary search is, IMO, where succinct names like "l", "r", "m"
are good and actually help keeping algorithm code more succinct
without making it more obscure

> +                               t = btf_type_by_id(btf, mid);
> +                               tname = btf__str_by_offset(btf, t->name_off);
> +                               ret = cmp_btf_kind_name(BTF_INFO_KIND(t->info), tname,
> +                                                       kind, type_name);
> +                               if (ret < 0)
> +                                       start = mid + 1;
> +                               else {
> +                                       if (ret == 0)
> +                                               found = mid;
> +                                       end = mid - 1;
> +                               }
> +                       }
> +
> +                       if (found != -1)
> +                               return found;

please check find_linfo() in kernel/bpf/log.c for a very succinct
implementation of binary search where we look not for exact match, but
rather leftmost or rightmost element satisfying some condition.
find_linfo() is actually looking for leftmost element (despite what
comment says :) ), so I think can be followed very closely.

> +               } else {
> +                       /* linear search */
> +                       __u32 i, total;
> +
> +                       total = btf__type_cnt(btf);
> +                       for (i = start_id; i < total; i++) {
> +                               t = btf_type_by_id(btf, i);
> +                               if (btf_kind(t) != kind)
> +                                       continue;
> +
> +                               tname = btf__str_by_offset(btf, t->name_off);
> +                               if (tname && !strcmp(tname, type_name))
> +                                       return i;
> +                       }
> +               }
> +       }
> +

[...]