[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAErzpms3cyc0urFkZ3dPYR9UwL=3393qLh0PTZvcGhF2GXSrww@mail.gmail.com>
Date: Wed, 29 Oct 2025 13:04:20 +0800
From: Donglin Peng <dolinux.peng@...il.com>
To: Andrii Nakryiko <andrii.nakryiko@...il.com>
Cc: ast@...nel.org, linux-kernel@...r.kernel.org, bpf@...r.kernel.org, 
	Eduard Zingerman <eddyz87@...il.com>, Alan Maguire <alan.maguire@...cle.com>, Song Liu <song@...nel.org>, 
	pengdonglin <pengdonglin@...omi.com>
Subject: Re: [RFC PATCH v3 1/3] btf: implement BTF type sorting for
 accelerated lookups
On Wed, Oct 29, 2025 at 2:38 AM Andrii Nakryiko
<andrii.nakryiko@...il.com> wrote:
>
> On Mon, Oct 27, 2025 at 6:54 AM Donglin Peng <dolinux.peng@...il.com> wrote:
> >
> > This patch introduces a new libbpf interface btf__permute() to reorganize
> > BTF types according to a provided mapping. The BTF lookup mechanism is
> > enhanced with binary search capability, significantly improving lookup
> > performance for large type sets.
> >
> > The pahole tool can invoke this interface with a sorted type ID array,
> > enabling binary search in both user space and kernel. To share core logic
> > between kernel and libbpf, common sorting functionality is implemented
> > in a new btf_sort.c source file.
> >
> > Cc: Eduard Zingerman <eddyz87@...il.com>
> > Cc: Alexei Starovoitov <ast@...nel.org>
> > Cc: Andrii Nakryiko <andrii.nakryiko@...il.com>
> > Cc: Alan Maguire <alan.maguire@...cle.com>
> > Cc: Song Liu <song@...nel.org>
> > Co-developed-by: Eduard Zingerman <eddyz87@...il.com>
> > Signed-off-by: pengdonglin <pengdonglin@...omi.com>
> > Signed-off-by: Donglin Peng <dolinux.peng@...il.com>
> > ---
> > v2->v3:
> > - Remove sorting logic from libbpf and provide a generic btf__permute() interface
> > - Remove the search direction patch since sorted lookup provides sufficient performance
> >   and changing search order could cause conflicts between BTF and base BTF
> > - Include btf_sort.c directly in btf.c to reduce function call overhead
> > ---
> >  tools/lib/bpf/btf.c            | 262 ++++++++++++++++++++++++++++++---
> >  tools/lib/bpf/btf.h            |  17 +++
> >  tools/lib/bpf/btf_sort.c       | 174 ++++++++++++++++++++++
> >  tools/lib/bpf/btf_sort.h       |  11 ++
> >  tools/lib/bpf/libbpf.map       |   6 +
> >  tools/lib/bpf/libbpf_version.h |   2 +-
> >  6 files changed, 447 insertions(+), 25 deletions(-)
> >  create mode 100644 tools/lib/bpf/btf_sort.c
> >  create mode 100644 tools/lib/bpf/btf_sort.h
> >
>
> This looks a bit over-engineered, let's try to simplify and have more
> succinct implementation
Thanks, I will simplify it.
>
> pw-bot: cr
>
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 18907f0fcf9f..d20bf81a21ce 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -23,6 +23,7 @@
> >  #include "libbpf_internal.h"
> >  #include "hashmap.h"
> >  #include "strset.h"
> > +#include "btf_sort.h"
> >
> >  #define BTF_MAX_NR_TYPES 0x7fffffffU
> >  #define BTF_MAX_STR_OFFSET 0x7fffffffU
> > @@ -92,6 +93,12 @@ struct btf {
> >          *   - for split BTF counts number of types added on top of base BTF.
> >          */
> >         __u32 nr_types;
> > +       /* number of sorted and named types in this BTF instance:
> > +        *   - doesn't include special [0] void type;
> > +        *   - for split BTF counts number of sorted and named types added on
> > +        *     top of base BTF.
> > +        */
> > +       __u32 nr_sorted_types;
> >         /* if not NULL, points to the base BTF on top of which the current
> >          * split BTF is based
> >          */
> > @@ -624,6 +631,11 @@ const struct btf *btf__base_btf(const struct btf *btf)
> >         return btf->base_btf;
> >  }
> >
> > +__u32 btf__start_id(const struct btf *btf)
> > +{
> > +       return btf->start_id;
> > +}
>
> this can already be determined using btf__base_btf() (and then getting
> its btf__type_cnt()), but I guess I don't mind having a small
Yes, you're right. The calculation is sufficient:
- If base_btf is NULL, start_id = 1
- If base_btf is not NULL, start_id = btf__type_cnt(base_btf)
> dedicated API for this. But please add it as a separate patch
>
> > +
> >  /* internal helper returning non-const pointer to a type */
> >  struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)
> >  {
> > @@ -915,38 +927,16 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
> >         return libbpf_err(-ENOENT);
> >  }
> >
> > -static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > -                                  const char *type_name, __u32 kind)
> > -{
> > -       __u32 i, nr_types = btf__type_cnt(btf);
> > -
> > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > -               return 0;
> > -
> > -       for (i = start_id; i < nr_types; i++) {
> > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > -               const char *name;
> > -
> > -               if (btf_kind(t) != kind)
> > -                       continue;
> > -               name = btf__name_by_offset(btf, t->name_off);
> > -               if (name && !strcmp(type_name, name))
> > -                       return i;
> > -       }
> > -
> > -       return libbpf_err(-ENOENT);
> > -}
> > -
> >  __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
> >                                  __u32 kind)
> >  {
> > -       return btf_find_by_name_kind(btf, btf->start_id, type_name, kind);
> > +       return _btf_find_by_name_kind(btf, btf->start_id, type_name, kind);
> >  }
> >
> >  __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
> >                              __u32 kind)
> >  {
> > -       return btf_find_by_name_kind(btf, 1, type_name, kind);
> > +       return _btf_find_by_name_kind(btf, 1, type_name, kind);
>
> nit: please avoid using underscore-prefixed names
Thanks. I will fix it in the next version.
>
> >  }
> >
> >  static bool btf_is_modifiable(const struct btf *btf)
> > @@ -1091,6 +1081,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
> >         err = err ?: btf_sanity_check(btf);
> >         if (err)
> >                 goto done;
> > +       btf_check_sorted(btf, btf->start_id);
>
> let's do this lazily when we actually need to search by name, that
> will also work better with invalidation (currently you don't recheck
> sortedness after invalidation)
Thanks. I'll defer the call to btf_check_sorted until the first invocation of
btf__find_by_name_kind.
>
> [...]
>
> > +/*
> > + * Permute BTF types in-place using the ID mapping from btf_permute_opts->ids.
> > + * After permutation, all type ID references are updated to reflect the new
> > + * ordering. If a struct btf_ext (representing '.BTF.ext' section) is provided,
> > + * type ID references within the BTF extension data are also updated.
> > + */
>
> See how we provide doc comment for public APIs and do the same with btf__permute
Thanks, I will fix it in the next version.
>
> > +int btf__permute(struct btf *btf, const struct btf_permute_opts *opts)
>
> id map array is mandatory parameter which will always be specified,
> make it a fixed argument. We use opts for something that's optional
> and/or infrequently used. btf_ext being part of opts makes total
> sense, though.
Thanks, I will fix it in the next version, like:
int btf__permute(struct btf *btf, __u32 *id_map, const struct
btf_permute_opts *opts)
>
> > +{
> > +       struct btf_permute *p;
> > +       int err = 0;
> > +
> > +       if (!OPTS_VALID(opts, btf_permute_opts))
> > +               return libbpf_err(-EINVAL);
> > +
> > +       p = btf_permute_new(btf, opts);
> > +       if (!p) {
> > +               pr_debug("btf_permute_new failed: %ld\n", PTR_ERR(p));
> > +               return libbpf_err(-EINVAL);
> > +       }
> > +
> > +       if (btf_ensure_modifiable(btf)) {
> > +               err = -ENOMEM;
> > +               goto done;
> > +       }
> > +
> > +       err = btf_permute_shuffle_types(p);
> > +       if (err < 0) {
> > +               pr_debug("btf_permute_shuffle_types failed: %s\n", errstr(err));
> > +               goto done;
> > +       }
> > +       err = btf_permute_remap_types(p);
>
> can't we remap IDs as we shuffle and move types around? I'm not sure
This approach appears infeasible, as it necessitates first generating a
complete type ID mapping (similar to btf_dedup's hypot_map) to translate
original IDs to new IDs for all referenced types, followed by executing the
remapping phase.
> we need entire struct btf_permute to keep track of all of this, this
> can be a local state in a single function
Thank you. I think that a btf_permute function may be necessary. As Eduard
suggested, we can generalize btf_dedup_remap_types into a reusable function,
which would require a dedicated structure to encapsulate all necessary
parameters.
>
> > +       if (err) {
> > +               pr_debug("btf_permute_remap_types failed: %s\n", errstr(err));
> > +               goto done;
> > +       }
> > +
> > +done:
> > +       btf_permute_free(p);
> > +       return libbpf_err(err);
> > +}
> > +
>
> [...]
>
> > +/*
> > + * Shuffle BTF types.
> > + *
> > + * Rearranges types according to the permutation map in p->ids. The p->map
> > + * array stores the mapping from original type IDs to new shuffled IDs,
> > + * which is used in the next phase to update type references.
> > + */
> > +static int btf_permute_shuffle_types(struct btf_permute *p)
> > +{
> > +       struct btf *btf = p->btf;
> > +       const struct btf_type *t;
> > +       __u32 *new_offs = NULL;
> > +       void *l, *new_types = NULL;
> > +       int i, id, len, err;
> > +
> > +       new_offs = calloc(btf->nr_types, sizeof(*new_offs));
> > +       new_types = calloc(btf->hdr->type_len, 1);
> > +       if (!new_types || !new_offs) {
> > +               err = -ENOMEM;
> > +               goto out_err;
> > +       }
> > +
> > +       l = new_types;
>
> What does "l" refer to in this name? It rings no bells for me...
Would the name "nt" be acceptable?
>
> > +       for (i = 0; i < btf->nr_types; i++) {
>
> this won't work with split BTF, no?
The `ids` array in `btf_permute` stores type IDs for split BTF.
Local testing confirms that kernel module BTF is properly
pre-sorted during build using a patched pahole version.
Example usage in pahole:
static int btf_encoder__sort(struct btf *btf)
{
       LIBBPF_OPTS(btf_permute_opts, opts);
       int start_id = btf__start_id(btf);
       int nr_types = btf__type_cnt(btf) - start_id;
       __u32 *permute_ids;
       int i, id, err = 0;
       if (nr_types < 2)
               return 0;
       permute_ids = calloc(nr_types, sizeof(*permute_ids));
       if (!permute_ids) {
               err = -ENOMEM;
               goto out_free;
       }
       for (i = 0, id = start_id; i < nr_types; i++, id++)
               permute_ids[i] = id;
       qsort_r(permute_ids, nr_types, sizeof(*permute_ids),
               cmp_types_kinds_names, btf);
       opts.ids = permute_ids;
       err = btf__permute(btf, &opts);
       if (err)
               goto out_free;
out_free:
       if (permute_ids)
               free(permute_ids);
       return err;
}
>
> > +               id = p->ids[i];
> > +               t = btf__type_by_id(btf, id);
> > +               len = btf_type_size(t);
> > +               memcpy(l, t, len);
> > +               new_offs[i] = l - new_types;
> > +               p->map[id - btf->start_id] = btf->start_id + i;
> > +               l += len;
> > +       }
> > +
> > +       free(btf->types_data);
> > +       free(btf->type_offs);
> > +       btf->types_data = new_types;
> > +       btf->type_offs = new_offs;
> > +       return 0;
> > +
>
> [...]
>
> > diff --git a/tools/lib/bpf/btf_sort.c b/tools/lib/bpf/btf_sort.c
> > new file mode 100644
> > index 000000000000..553c5f5e61bd
> > --- /dev/null
> > +++ b/tools/lib/bpf/btf_sort.c
>
> why does this have to be a separate file? can't it be part of btf.c?
Thanks. The kernel can leverage existing common infrastructure, similar to
the approach used in bpf_relocate.c. If this shared approach isn't acceptable,
I'm prepared to implement separate versions for both libbpf and the kernel.
https://lore.kernel.org/all/34a168e2-204d-47e2-9923-82d8ad645273@oracle.com/#t
https://lore.kernel.org/all/7f770a27-6ca6-463f-9145-5c795e0b3f40@oracle.com/
>
> > @@ -0,0 +1,174 @@
> > +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> > +/* Copyright (c) 2025 Xiaomi */
> > +
> > +#ifndef _GNU_SOURCE
> > +#define _GNU_SOURCE
> > +#endif
> > +
> > +#ifdef __KERNEL__
> > +
> > +#define btf_type_by_id                         (struct btf_type *)btf_type_by_id
> > +#define btf__str_by_offset                     btf_str_by_offset
> > +#define btf__type_cnt                          btf_nr_types
> > +#define btf__start_id                          btf_start_id
> > +#define libbpf_err(x)                          x
> > +
> > +#else
> > +
> > +#define notrace
> > +
> > +#endif /* __KERNEL__ */
> > +
> > +/*
> > + * Skip the sorted check if the number of BTF types is below this threshold.
> > + * The value 4 is chosen based on the theoretical break-even point where
> > + * linear search (N/2) and binary search (LOG2(N)) require approximately
> > + * the same number of comparisons.
> > + */
> > +#define BTF_CHECK_SORT_THRESHOLD  4
>
> I agree with Eduard, it seems like an overkill. For small BTFs this
> all doesn't matter anyways.
Thanks, I will remove it in the next  version.
>
> > +
> > +struct btf;
> > +
> > +static int cmp_btf_kind_name(int ka, const char *na, int kb, const char *nb)
> > +{
> > +       return (ka - kb) ?: strcmp(na, nb);
> > +}
> > +
> > +/*
> > + * Sort BTF types by kind and name in ascending order, placing named types
> > + * before anonymous ones.
> > + */
> > +static int btf_compare_type_kinds_names(const void *a, const void *b, void *priv)
> > +{
> > +       struct btf *btf = (struct btf *)priv;
> > +       struct btf_type *ta = btf_type_by_id(btf, *(__u32 *)a);
> > +       struct btf_type *tb = btf_type_by_id(btf, *(__u32 *)b);
> > +       const char *na, *nb;
> > +       bool anon_a, anon_b;
> > +       int ka, kb;
> > +
> > +       na = btf__str_by_offset(btf, ta->name_off);
> > +       nb = btf__str_by_offset(btf, tb->name_off);
> > +       anon_a = str_is_empty(na);
> > +       anon_b = str_is_empty(nb);
> > +
> > +       /* ta w/o name is greater than tb */
> > +       if (anon_a && !anon_b)
> > +               return 1;
> > +       /* tb w/o name is smaller than ta */
> > +       if (!anon_a && anon_b)
> > +               return -1;
> > +
> > +       ka = btf_kind(ta);
> > +       kb = btf_kind(tb);
> > +
> > +       if (anon_a && anon_b)
> > +               return ka - kb;
> > +
> > +       return cmp_btf_kind_name(ka, na, kb, nb);
> > +}
>
> I think we should keep it simple and only use sorted-by-name property.
> Within the same name, we can just search linearly for necessary kind.
This approach is feasible, though it may introduce some overhead in the search
function. I previously implemented a hybrid method that first sorts
types by name
and then combines binary search with linear search for handling collisions.
https://lore.kernel.org/all/20240608140835.965949-1-dolinux.peng@gmail.com/
id = btf_find_by_name_bsearch(btf, name, &start, &end);
if (id > 0) {
    while (start <= end) {
        t = btf_type_by_id(btf, start);
        if (BTF_INFO_KIND(t->info) == kind)
            return start;
        start++;
        }
}
Could this be acceptable?
>
> > +
> > +static __s32 notrace __btf_find_by_name_kind(const struct btf *btf, int start_id,
> > +                                  const char *type_name, __u32 kind)
> > +{
> > +       const struct btf_type *t;
> > +       const char *tname;
> > +       int err = -ENOENT;
> > +
> > +       if (!btf)
> > +               goto out;
> > +
> > +       if (start_id < btf__start_id(btf)) {
> > +               err = __btf_find_by_name_kind(btf->base_btf, start_id, type_name, kind);
> > +               if (err == -ENOENT)
> > +                       start_id = btf__start_id(btf);
> > +       }
> > +
> > +       if (err == -ENOENT) {
> > +               if (btf->nr_sorted_types) {
> > +                       /* binary search */
> > +                       __s32 start, end, mid, found = -1;
> > +                       int ret;
> > +
> > +                       start = start_id;
> > +                       end = start + btf->nr_sorted_types - 1;
> > +                       /* found the leftmost btf_type that matches */
> > +                       while(start <= end) {
> > +                               mid = start + (end - start) / 2;
>
> nit: binary search is, IMO, where succinct names like "l", "r", "m"
> are good and actually help keeping algorithm code more succinct
> without making it more obscure
Thanks, I will fix it in the next version.
>
> > +                               t = btf_type_by_id(btf, mid);
> > +                               tname = btf__str_by_offset(btf, t->name_off);
> > +                               ret = cmp_btf_kind_name(BTF_INFO_KIND(t->info), tname,
> > +                                                       kind, type_name);
> > +                               if (ret < 0)
> > +                                       start = mid + 1;
> > +                               else {
> > +                                       if (ret == 0)
> > +                                               found = mid;
> > +                                       end = mid - 1;
> > +                               }
> > +                       }
> > +
> > +                       if (found != -1)
> > +                               return found;
>
> please check find_linfo() in kernel/bpf/log.c for a very succinct
> implementation of binary search where we look not for exact match, but
> rather leftmost or rightmost element satisfying some condition.
> find_linfo() is actually looking for leftmost element (despite what
> comment says :) ), so I think can be followed very closely.
Thank you for the suggestion. If we sort types solely by name as proposed,
we would need to first identify the leftmost and rightmost bounds of the
matching name range (similar to find_linfo's approach), then perform a
linear search within that range to find the first type with a matching kind.
>
> > +               } else {
> > +                       /* linear search */
> > +                       __u32 i, total;
> > +
> > +                       total = btf__type_cnt(btf);
> > +                       for (i = start_id; i < total; i++) {
> > +                               t = btf_type_by_id(btf, i);
> > +                               if (btf_kind(t) != kind)
> > +                                       continue;
> > +
> > +                               tname = btf__str_by_offset(btf, t->name_off);
> > +                               if (tname && !strcmp(tname, type_name))
> > +                                       return i;
> > +                       }
> > +               }
> > +       }
> > +
>
> [...]
Powered by blists - more mailing lists
 
