linux-kernel - Re: [RFC PATCH v2 2/5] btf: sort BTF types by kind and name to enable binary search

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <174642a334760af39a5e7bacdd8b977b392a82c7.camel@gmail.com>
Date: Tue, 21 Oct 2025 11:59:02 -0700
From: Eduard Zingerman <eddyz87@...il.com>
To: Donglin Peng <dolinux.peng@...il.com>, ast@...nel.org
Cc: linux-kernel@...r.kernel.org, bpf@...r.kernel.org, Andrii Nakryiko	
 <andrii.nakryiko@...il.com>, Alan Maguire <alan.maguire@...cle.com>, Song
 Liu	 <song@...nel.org>, pengdonglin <pengdonglin@...omi.com>
Subject: Re: [RFC PATCH v2 2/5] btf: sort BTF types by kind and name to
 enable binary search

On Mon, 2025-10-20 at 17:39 +0800, Donglin Peng wrote:
> This patch implements sorting of BTF types by their kind and name,
> enabling the use of binary search for type lookups.
> 
> To share logic between kernel and libbpf, a new btf_sort.c file is
> introduced containing common sorting functionality.
> 
> The sorting is performed during btf__dedup() when the new
> sort_by_kind_name option in btf_dedup_opts is enabled.

Do we really need this option?  Dedup is free to rearrange btf types
anyway, so why not sort always?  Is execution time a concern?

> For vmlinux and kernel module BTF, btf_check_sorted() verifies
> whether the types are sorted and binary search can be used.

[...]

> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index c414cf37e1bd..11b05f4eb07d 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -259,6 +259,7 @@ struct btf {
>  	void *nohdr_data;
>  	struct btf_header hdr;
>  	u32 nr_types; /* includes VOID for base BTF */
> +	u32 nr_sorted_types;
>  	u32 types_size;
>  	u32 data_size;
>  	refcount_t refcnt;
> @@ -544,33 +545,29 @@ u32 btf_nr_types(const struct btf *btf)
>  	return total;
>  }
>  
> -u32 btf_type_cnt(const struct btf *btf)
> +u32 btf_start_id(const struct btf *btf)
>  {
> -	return btf->start_id + btf->nr_types;
> +	return btf->start_id;
>  }
>  
> -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> +u32 btf_nr_sorted_types(const struct btf *btf)
>  {
> -	const struct btf_type *t;
> -	const char *tname;
> -	u32 i, total;
> -
> -	do {
> -		total = btf_type_cnt(btf);
> -		for (i = btf->start_id; i < total; i++) {
> -			t = btf_type_by_id(btf, i);
> -			if (BTF_INFO_KIND(t->info) != kind)
> -				continue;
> +	return btf->nr_sorted_types;
> +}
>  
> -			tname = btf_name_by_offset(btf, t->name_off);
> -			if (!strcmp(tname, name))
> -				return i;
> -		}
> +void btf_set_nr_sorted_types(struct btf *btf, u32 nr)
> +{
> +	btf->nr_sorted_types = nr;
> +}
>  
> -		btf = btf->base_btf;
> -	} while (btf);
> +u32 btf_type_cnt(const struct btf *btf)
> +{
> +	return btf->start_id + btf->nr_types;
> +}
>  
> -	return -ENOENT;
> +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> +{
> +	return find_btf_by_name_kind(btf, 1, name, kind);
                                         ^^^
		nit: this will make it impossible to find "void" w/o a special case
		     in the find_btf_by_name_kind(), why not start from 0?
>  }
>  
>  s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)

[...]

> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 18907f0fcf9f..87e47f0b78ba 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c

[...]

> +/*
> + * Compact and sort BTF types.
> + *
> + * Similar to btf_dedup_compact_types, but additionally sorts the btf_types.
> + */
> +static int btf__dedup_compact_and_sort_types(struct btf_dedup *d)
> +{

And this function will become btf__dedup_compact_types(),
if BTF will be always sorted. Thus removing some code duplication.

[...]

> diff --git a/tools/lib/bpf/btf_sort.c b/tools/lib/bpf/btf_sort.c
> new file mode 100644
> index 000000000000..2ad4a56f1c08
> --- /dev/null
> +++ b/tools/lib/bpf/btf_sort.c

[...]

> +/*
> + * Sort BTF types by kind and name in ascending order, placing named types
> + * before anonymous ones.
> + */
> +int btf_compare_type_kinds_names(const void *a, const void *b, void *priv)
> +{
> +	struct btf *btf = (struct btf *)priv;
> +	struct btf_type *ta = btf_type_by_id(btf, *(__u32 *)a);
> +	struct btf_type *tb = btf_type_by_id(btf, *(__u32 *)b);
> +	const char *na, *nb;
> +	int ka, kb;
> +
> +	/* ta w/o name is greater than tb */
> +	if (!ta->name_off && tb->name_off)
> +		return 1;
> +	/* tb w/o name is smaller than ta */
> +	if (ta->name_off && !tb->name_off)
> +		return -1;
> +
> +	ka = btf_kind(ta);
> +	kb = btf_kind(tb);
> +	na = btf__str_by_offset(btf, ta->name_off);
> +	nb = btf__str_by_offset(btf, tb->name_off);
> +
> +	return cmp_btf_kind_name(ka, na, kb, nb);

If both types are anonymous and have the same kind, this will lead to
strcmp(NULL, NULL). On kernel side that would lead to null pointer
dereference.

> +}
> +
> +__s32 find_btf_by_name_kind(const struct btf *btf, int start_id,
> +				   const char *type_name, __u32 kind)

Nit: having functions with names btf_find_by_name_kind and
     	    	      	   	 find_btf_by_name_kind
     is very confusing.
     Usually we use names like __<func> for auxiliary functions
     like this.

> +{
> +	const struct btf_type *t;
> +	const char *tname;
> +	__u32 i, total;
> +
> +	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> +		return 0;
> +
> +	do {
> +		if (btf__nr_sorted_types(btf)) {
> +			/* binary search */
> +			__s32 start, end, mid, found = -1;
> +			int ret;
> +
> +			start = btf__start_id(btf);
> +			end = start + btf__nr_sorted_types(btf) - 1;
> +			/* found the leftmost btf_type that matches */
> +			while(start <= end) {
> +				mid = start + (end - start) / 2;
> +				t = btf_type_by_id(btf, mid);
> +				tname = btf__name_by_offset(btf, t->name_off);
> +				ret = cmp_btf_kind_name(BTF_INFO_KIND(t->info), tname,
> +							kind, type_name);
> +				if (ret == 0)
> +					found = mid;
> +				if (ret < 0)
> +					start = mid + 1;
> +				else if (ret >= 0)
> +					end = mid - 1;
> +			}
> +
> +			if (found != -1)
> +				return found;
> +		} else {
> +			/* linear search */
> +			total = btf__type_cnt(btf);
> +			for (i = btf__start_id(btf); i < total; i++) {
> +				t = btf_type_by_id(btf, i);
> +				if (btf_kind(t) != kind)
> +					continue;
> +
> +				tname = btf__name_by_offset(btf, t->name_off);
> +				if (tname && !strcmp(tname, type_name))
> +					return i;
> +			}
> +		}
> +
> +		btf = btf__base_btf(btf);
> +	} while (btf && btf__start_id(btf) >= start_id);
> +
> +	return libbpf_err(-ENOENT);
> +}
> +
> +void btf_check_sorted(struct btf *btf, int start_id)
> +{
> +	const struct btf_type *t;
> +	int i, n, nr_sorted_types;
> +
> +	n = btf__type_cnt(btf);
> +	if ((n - start_id) < BTF_CHECK_SORT_THRESHOLD)
> +		return;

Are there any measurable performance benefits from having this special case?

> +
> +	n--;
> +	nr_sorted_types = 0;
> +	for (i = start_id; i < n; i++) {
> +		int k = i + 1;
> +
> +		t = btf_type_by_id(btf, i);
> +		if (!btf__str_by_offset(btf, t->name_off))
> +			return;

I am confused.
This effectively bans BTFs with anonymous types,
as btf__set_nr_sorted_types() wont be called if such types are found.
Anonymous types are very common, e.g. all FUNC_PROTO are anonymous.

> +
> +		t = btf_type_by_id(btf, k);
> +		if (!btf__str_by_offset(btf, t->name_off))
> +			return;
> +
> +		if (btf_compare_type_kinds_names(&i, &k, btf) > 0)
> +			return;
> +
> +		if (t->name_off)
> +			nr_sorted_types++;
> +	}
> +
> +	t = btf_type_by_id(btf, start_id);
> +	if (t->name_off)
> +		nr_sorted_types++;
> +	if (nr_sorted_types >= BTF_CHECK_SORT_THRESHOLD)
> +		btf__set_nr_sorted_types(btf, nr_sorted_types);
> +}
> +

[...]