[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20211231022307.3cwff3suzemuiiqk@ast-mbp.dhcp.thefacebook.com>
Date: Thu, 30 Dec 2021 18:23:07 -0800
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: Kumar Kartikeya Dwivedi <memxor@...il.com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org,
netfilter-devel@...r.kernel.org,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
John Fastabend <john.fastabend@...il.com>,
Maxim Mikityanskiy <maximmi@...dia.com>,
Pablo Neira Ayuso <pablo@...filter.org>,
Florian Westphal <fw@...len.de>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Toke Høiland-Jørgensen <toke@...hat.com>
Subject: Re: [PATCH bpf-next v5 2/9] bpf: Prepare kfunc BTF ID sets when
parsing kernel BTF
On Thu, Dec 30, 2021 at 08:06:58AM +0530, Kumar Kartikeya Dwivedi wrote:
>
> The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
> STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
> and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
>
> A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
> set of certain hook and type. They are also allocated on demand, and
> otherwise set as NULL.
>
> A new btf_kfunc_id_set_contains function is exposed for use in verifier,
> this new method is faster than the existing list searching method, and
> is also automatic. It also lets other code not care whether the set is
> unallocated or not.
>
> Next commit will update the kernel users to make use of this
> infrastructure.
>
> Finally, add __maybe_unused annotation for BTF ID macros for the
> !CONFIG_DEBUG_INFO_BTF case , so that they don't produce warnings during
> build time.
>
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@...il.com>
>
> fixup maybe_unused
> ---
> include/linux/btf.h | 25 ++++
> include/linux/btf_ids.h | 20 ++-
> kernel/bpf/btf.c | 275 +++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 312 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 0c74348cbc9d..48ac2dc437a2 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -300,6 +300,21 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo(
> return (const struct btf_var_secinfo *)(t + 1);
> }
>
> +enum btf_kfunc_hook {
> + BTF_KFUNC_HOOK_XDP,
> + BTF_KFUNC_HOOK_TC,
> + BTF_KFUNC_HOOK_STRUCT_OPS,
> + _BTF_KFUNC_HOOK_MAX,
> +};
> +
> +enum btf_kfunc_type {
> + BTF_KFUNC_TYPE_CHECK,
> + BTF_KFUNC_TYPE_ACQUIRE,
> + BTF_KFUNC_TYPE_RELEASE,
> + BTF_KFUNC_TYPE_RET_NULL,
> + _BTF_KFUNC_TYPE_MAX,
> +};
> +
> #ifdef CONFIG_BPF_SYSCALL
> struct bpf_prog;
>
> @@ -307,6 +322,9 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
> const char *btf_name_by_offset(const struct btf *btf, u32 offset);
> struct btf *btf_parse_vmlinux(void);
> struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
> +bool btf_kfunc_id_set_contains(const struct btf *btf,
> + enum bpf_prog_type prog_type,
> + enum btf_kfunc_type type, u32 kfunc_btf_id);
> #else
> static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
> u32 type_id)
> @@ -318,6 +336,13 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
> {
> return NULL;
> }
> +static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
> + enum bpf_prog_type prog_type,
> + enum btf_kfunc_type type,
> + u32 kfunc_btf_id)
> +{
> + return false;
> +}
> #endif
>
> struct kfunc_btf_id_set {
> diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
> index 919c0fde1c51..835fbf626ef1 100644
> --- a/include/linux/btf_ids.h
> +++ b/include/linux/btf_ids.h
> @@ -11,6 +11,7 @@ struct btf_id_set {
> #ifdef CONFIG_DEBUG_INFO_BTF
>
> #include <linux/compiler.h> /* for __PASTE */
> +#include <linux/compiler_attributes.h> /* for __maybe_unused */
>
> /*
> * Following macros help to define lists of BTF IDs placed
> @@ -144,17 +145,24 @@ asm( \
> ".popsection; \n"); \
> extern struct btf_id_set name;
>
> +#define BTF_KFUNC_SET_START(hook, type, name) \
> + BTF_SET_START(btf_kfunc_set_##hook##_##type##_##name)
> +#define BTF_KFUNC_SET_END(hook, type, name) \
> + BTF_SET_END(btf_kfunc_set_##hook##_##type##_##name)
> +
> #else
>
> -#define BTF_ID_LIST(name) static u32 name[5];
> +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
> #define BTF_ID(prefix, name)
> #define BTF_ID_UNUSED
> -#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
> -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
> -#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
> -#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
> -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
> +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
> +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
> +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
> +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
> +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
> #define BTF_SET_END(name)
> +#define BTF_KFUNC_SET_START(hook, type, name) BTF_SET_START(name)
> +#define BTF_KFUNC_SET_END(hook, type, name) BTF_SET_END(name)
>
> #endif /* CONFIG_DEBUG_INFO_BTF */
>
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 33bb8ae4a804..c03c7b5a417c 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -1,6 +1,8 @@
> /* SPDX-License-Identifier: GPL-2.0 */
> /* Copyright (c) 2018 Facebook */
>
> +#include <linux/kallsyms.h>
> +#include <linux/module.h>
> #include <uapi/linux/btf.h>
> #include <uapi/linux/bpf.h>
> #include <uapi/linux/bpf_perf_event.h>
> @@ -198,6 +200,8 @@
> DEFINE_IDR(btf_idr);
> DEFINE_SPINLOCK(btf_idr_lock);
>
> +struct btf_kfunc_set_tab;
> +
> struct btf {
> void *data;
> struct btf_type **types;
> @@ -212,6 +216,7 @@ struct btf {
> refcount_t refcnt;
> u32 id;
> struct rcu_head rcu;
> + struct btf_kfunc_set_tab *kfunc_set_tab;
>
> /* split BTF support */
> struct btf *base_btf;
> @@ -221,6 +226,31 @@ struct btf {
> bool kernel_btf;
> };
>
> +#define BTF_KFUNC_SET_PREFIX "btf_kfunc_set_"
> +
> +BTF_ID_LIST_SINGLE(btf_id_set_id, struct, btf_id_set)
> +
> +static const char *kfunc_hook_str[_BTF_KFUNC_HOOK_MAX] = {
> + [BTF_KFUNC_HOOK_XDP] = "xdp_",
> + [BTF_KFUNC_HOOK_TC] = "tc_",
> + [BTF_KFUNC_HOOK_STRUCT_OPS] = "struct_ops_",
> +};
> +
> +static const char *kfunc_type_str[_BTF_KFUNC_TYPE_MAX] = {
> + [BTF_KFUNC_TYPE_CHECK] = "check_",
> + [BTF_KFUNC_TYPE_ACQUIRE] = "acquire_",
> + [BTF_KFUNC_TYPE_RELEASE] = "release_",
> + [BTF_KFUNC_TYPE_RET_NULL] = "ret_null_",
> +};
> +
> +enum {
> + BTF_KFUNC_SET_MAX_CNT = 32,
> +};
> +
> +struct btf_kfunc_set_tab {
> + struct btf_id_set *sets[_BTF_KFUNC_HOOK_MAX][_BTF_KFUNC_TYPE_MAX];
> +};
> +
> enum verifier_phase {
> CHECK_META,
> CHECK_TYPE,
> @@ -1531,8 +1561,21 @@ static void btf_free_id(struct btf *btf)
> spin_unlock_irqrestore(&btf_idr_lock, flags);
> }
>
> +static void btf_free_kfunc_set_tab(struct btf_kfunc_set_tab *tab)
> +{
> + int hook, type;
> +
> + if (!tab)
> + return;
> + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
> + for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
> + kfree(tab->sets[hook][type]);
> + }
> +}
> +
> static void btf_free(struct btf *btf)
> {
> + btf_free_kfunc_set_tab(btf->kfunc_set_tab);
> kvfree(btf->types);
> kvfree(btf->resolved_sizes);
> kvfree(btf->resolved_ids);
> @@ -4675,6 +4718,223 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
> BTF_ID_LIST(bpf_ctx_convert_btf_id)
> BTF_ID(struct, bpf_ctx_convert)
>
> +struct btf_parse_kfunc_data {
> + struct btf *btf;
> + struct bpf_verifier_log *log;
> +};
> +
> +static int btf_populate_kfunc_sets(struct btf *btf,
> + struct bpf_verifier_log *log,
> + enum btf_kfunc_hook hook,
> + enum btf_kfunc_type type,
> + struct btf_id_set *add_set)
> +{
> + struct btf_id_set *set, *tmp_set;
> + struct btf_kfunc_set_tab *tab;
> + u32 set_cnt;
> + int ret;
> +
> + if (WARN_ON_ONCE(hook >= _BTF_KFUNC_HOOK_MAX || type >= _BTF_KFUNC_TYPE_MAX))
> + return -EINVAL;
> + if (!add_set->cnt)
> + return 0;
> +
> + tab = btf->kfunc_set_tab;
> + if (!tab) {
> + tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
> + if (!tab)
> + return -ENOMEM;
> + btf->kfunc_set_tab = tab;
> + }
> +
> + set = tab->sets[hook][type];
> + set_cnt = set ? set->cnt : 0;
> +
> + if (set_cnt > U32_MAX - add_set->cnt) {
> + ret = -EOVERFLOW;
> + goto end;
> + }
> +
> + if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
> + bpf_log(log, "max kfunc (%d) for hook '%s' type '%s' exceeded\n",
> + BTF_KFUNC_SET_MAX_CNT, kfunc_hook_str[hook], kfunc_type_str[type]);
> + ret = -E2BIG;
> + goto end;
> + }
> +
> + /* Grow set */
> + tmp_set = krealloc(set, offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
> + GFP_KERNEL | __GFP_NOWARN);
> + if (!tmp_set) {
> + ret = -ENOMEM;
> + goto end;
> + }
> +
> + /* For newly allocated set, initialize set->cnt to 0 */
> + if (!set)
> + tmp_set->cnt = 0;
> +
> + tab->sets[hook][type] = tmp_set;
> + set = tmp_set;
> +
> + /* Concatenate the two sets */
> + memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
> + set->cnt += add_set->cnt;
> +
> + return 0;
> +end:
> + btf_free_kfunc_set_tab(tab);
> + btf->kfunc_set_tab = NULL;
> + return ret;
> +}
> +
> +static int btf_kfunc_ids_cmp(const void *a, const void *b)
> +{
> + const u32 *id1 = a;
> + const u32 *id2 = b;
> +
> + if (*id1 < *id2)
> + return -1;
> + if (*id1 > *id2)
> + return 1;
> + return 0;
> +}
> +
> +static int btf_parse_kfunc_sets_cb(void *data, const char *symbol_name,
> + struct module *mod,
> + unsigned long symbol_value)
> +{
> + int pfx_size = sizeof(BTF_KFUNC_SET_PREFIX) - 1;
> + struct btf_id_set *set = (void *)symbol_value;
> + struct btf_parse_kfunc_data *bdata = data;
> + const char *orig_name = symbol_name;
> + int i, hook, type;
> +
> + BUILD_BUG_ON(ARRAY_SIZE(kfunc_hook_str) != _BTF_KFUNC_HOOK_MAX);
> + BUILD_BUG_ON(ARRAY_SIZE(kfunc_type_str) != _BTF_KFUNC_TYPE_MAX);
> +
> + if (strncmp(symbol_name, BTF_KFUNC_SET_PREFIX, pfx_size))
> + return 0;
> +
> + /* Identify hook */
> + symbol_name += pfx_size;
> + if (!*symbol_name) {
> + bpf_log(bdata->log, "incomplete kfunc btf_id_set specification: %s\n", orig_name);
> + return -EINVAL;
> + }
> + for (i = 0; i < ARRAY_SIZE(kfunc_hook_str); i++) {
> + pfx_size = strlen(kfunc_hook_str[i]);
> + if (strncmp(symbol_name, kfunc_hook_str[i], pfx_size))
> + continue;
> + break;
> + }
> + if (i == ARRAY_SIZE(kfunc_hook_str)) {
> + bpf_log(bdata->log, "invalid hook '%s' for kfunc_btf_id_set %s\n", symbol_name,
> + orig_name);
> + return -EINVAL;
> + }
> + hook = i;
> +
> + /* Identify type */
> + symbol_name += pfx_size;
> + if (!*symbol_name) {
> + bpf_log(bdata->log, "incomplete kfunc btf_id_set specification: %s\n", orig_name);
> + return -EINVAL;
> + }
> + for (i = 0; i < ARRAY_SIZE(kfunc_type_str); i++) {
> + pfx_size = strlen(kfunc_type_str[i]);
> + if (strncmp(symbol_name, kfunc_type_str[i], pfx_size))
> + continue;
> + break;
> + }
> + if (i == ARRAY_SIZE(kfunc_type_str)) {
> + bpf_log(bdata->log, "invalid type '%s' for kfunc_btf_id_set %s\n", symbol_name,
> + orig_name);
> + return -EINVAL;
> + }
> + type = i;
> +
> + return btf_populate_kfunc_sets(bdata->btf, bdata->log, hook, type, set);
I really like the direction taken by patches 2 and 3.
I think we can save the module_kallsyms_on_each_symbol loop though.
The registration mechanism, like:
register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
doesn't have to be complete removed.
It can replaced with a sequence of calls:
btf_populate_kfunc_sets(btf, hook, type, set);
from __init of the module.
The module knows its 'hook' and 'type' and set==&bpf_testmod_kfunc_btf_set.
The bpf_testmod_init() can call btf_populate_kfunc_sets() multiple
times to popualte sets into different hooks and types.
There is no need to 'unregister' any more.
And the patch 1 will no longer be necessary, since we don't need to iterate
every symbol of the module with module_kallsyms_on_each_symbol().
No need to standardize on the prefix and kfunc_[hook|type]_str,
though it's probably good idea anyway across module BTF sets.
The main disadvantage is that we lose 'log' in btf_populate_kfunc_sets(),
since __init of the module cannot have verifier log at that point.
But it can stay as 'ret = -E2BIG;' without bpf_log() and module registration
will fail in such case or we just warn inside __init if btf_populate_kfunc_sets
fails in the rare case.
wdyt?
> +}
> +
> +static int btf_parse_kfunc_sets(struct btf *btf, struct module *mod,
> + struct bpf_verifier_log *log)
> +{
> + struct btf_parse_kfunc_data data = { .btf = btf, .log = log, };
> + struct btf_kfunc_set_tab *tab;
> + int hook, type, ret;
> +
> + if (!btf_is_kernel(btf))
> + return -EINVAL;
> + if (WARN_ON_ONCE(btf_is_module(btf) && !mod)) {
> + bpf_log(log, "btf internal error: no module for module BTF %s\n", btf->name);
> + return -EFAULT;
> + }
> + if (mod)
> + ret = module_kallsyms_on_each_symbol(mod, btf_parse_kfunc_sets_cb, &data);
> + else
> + ret = kallsyms_on_each_symbol(btf_parse_kfunc_sets_cb, &data);
> +
> + tab = btf->kfunc_set_tab;
> + if (!ret && tab) {
> + /* Sort all populated sets */
> + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
> + for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) {
> + struct btf_id_set *set = tab->sets[hook][type];
> +
> + /* Not all sets may be populated */
> + if (!set)
> + continue;
> + sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_kfunc_ids_cmp,
> + NULL);
Didn't resolve_btfid store ids already sorted?
Why another sort is needed?
Because btf_populate_kfunc_sets() can concatenate the sets?
But if we let __init call it directly the module shouldn't use
the same hook/type combination multiple times with different sets.
So no secondary sorting will be necessary?
> This commit prepares the BTF parsing functions for vmlinux and module
> BTFs to find all kfunc BTF ID sets from the vmlinux and module symbols
> and concatentate all sets into single unified set which is sorted and
> keyed by the 'hook' it is meant for, and 'type' of set.
'sorted by hook' ?
The btf_id_set_contains() need to search it by 'id', so it's sorted by 'id'.
Is it because you're adding mod's IDs to vmlinux IDs and re-sorting them?
I think that's not worth optimizing. The patch 5 is doing:
btf_kfunc_id_set_contains(btf, prog_type, BTF_KFUNC_TYPE_RELEASE|ACQUIRE|RET_NULL, id)
but btf_kfunc_id_set_contains doesn't have to do it in a single bsearch.
The struct btf of the module has base_btf.
So btf_kfunc_id_set_contains() can do bsearch twice. Once in mod's btf sets[type][hook]
and once in vmlinux btf sets[type][hook]
and no secondary sorting will be necessary.
wdyt?
Powered by blists - more mailing lists