[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPhsuW4-tmiBAji-=zx5gRRweioXTQM__EqaJGTPQ63SLphoUA@mail.gmail.com>
Date: Sat, 15 Jun 2019 15:00:59 -0700
From: Song Liu <liu.song.a23@...il.com>
To: Andrii Nakryiko <andriin@...com>
Cc: Andrii Nakryiko <andrii.nakryiko@...il.com>,
bpf <bpf@...r.kernel.org>, Networking <netdev@...r.kernel.org>,
Alexei Starovoitov <ast@...com>,
Daniel Borkmann <daniel@...earbox.net>,
Kernel Team <kernel-team@...com>
Subject: Re: [PATCH bpf-next 6/8] libbpf: allow specifying map definitions
using BTF
On Mon, Jun 10, 2019 at 9:49 PM Andrii Nakryiko <andriin@...com> wrote:
>
> This patch adds support for a new way to define BPF maps. It relies on
> BTF to describe mandatory and optional attributes of a map, as well as
> captures type information of key and value naturally. This eliminates
> the need for BPF_ANNOTATE_KV_PAIR hack and ensures key/value sizes are
> always in sync with the key/value type.
>
> Relying on BTF, this approach allows for both forward and backward
> compatibility w.r.t. extending supported map definition features. By
> default, any unrecognized attributes are treated as an error, but it's
> possible relax this using MAPS_RELAX_COMPAT flag. New attributes, added
> in the future will need to be optional.
>
> The outline of the new map definition (short, BTF-defined maps) is as follows:
> 1. All the maps should be defined in .maps ELF section. It's possible to
> have both "legacy" map definitions in `maps` sections and BTF-defined
> maps in .maps sections. Everything will still work transparently.
> 2. The map declaration and initialization is done through
> a global/static variable of a struct type with few mandatory and
> extra optional fields:
> - type field is mandatory and specified type of BPF map;
> - key/value fields are mandatory and capture key/value type/size information;
> - max_entries attribute is optional; if max_entries is not specified or
> initialized, it has to be provided in runtime through libbpf API
> before loading bpf_object;
> - map_flags is optional and if not defined, will be assumed to be 0.
> 3. Key/value fields should be **a pointer** to a type describing
> key/value. The pointee type is assumed (and will be recorded as such
> and used for size determination) to be a type describing key/value of
> the map. This is done to save excessive amounts of space allocated in
> corresponding ELF sections for key/value of big size.
> 4. As some maps disallow having BTF type ID associated with key/value,
> it's possible to specify key/value size explicitly without
> associating BTF type ID with it. Use key_size and value_size fields
> to do that (see example below).
>
> Here's an example of simple ARRAY map defintion:
>
> struct my_value { int x, y, z; };
>
> struct {
> int type;
> int max_entries;
> int *key;
> struct my_value *value;
> } btf_map SEC(".maps") = {
> .type = BPF_MAP_TYPE_ARRAY,
> .max_entries = 16,
> };
>
> This will define BPF ARRAY map 'btf_map' with 16 elements. The key will
> be of type int and thus key size will be 4 bytes. The value is struct
> my_value of size 12 bytes. This map can be used from C code exactly the
> same as with existing maps defined through struct bpf_map_def.
>
> Here's an example of STACKMAP definition (which currently disallows BTF type
> IDs for key/value):
>
> struct {
> __u32 type;
> __u32 max_entries;
> __u32 map_flags;
> __u32 key_size;
> __u32 value_size;
> } stackmap SEC(".maps") = {
> .type = BPF_MAP_TYPE_STACK_TRACE,
> .max_entries = 128,
> .map_flags = BPF_F_STACK_BUILD_ID,
> .key_size = sizeof(__u32),
> .value_size = PERF_MAX_STACK_DEPTH * sizeof(struct bpf_stack_build_id),
> };
>
> This approach is naturally extended to support map-in-map, by making a value
> field to be another struct that describes inner map. This feature is not
> implemented yet. It's also possible to incrementally add features like pinning
> with full backwards and forward compatibility. Support for static
> initialization of BPF_MAP_TYPE_PROG_ARRAY using pointers to BPF programs
> is also on the roadmap.
>
> Signed-off-by: Andrii Nakryiko <andriin@...com>
> ---
> tools/lib/bpf/btf.h | 1 +
> tools/lib/bpf/libbpf.c | 338 +++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 330 insertions(+), 9 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index ba4ffa831aa4..88a52ae56fc6 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -17,6 +17,7 @@ extern "C" {
>
> #define BTF_ELF_SEC ".BTF"
> #define BTF_EXT_ELF_SEC ".BTF.ext"
> +#define MAPS_ELF_SEC ".maps"
How about .BTF.maps? Or maybe this doesn't realy matter?
>
> struct btf;
> struct btf_ext;
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 79a8143240d7..60713bcc2279 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -262,6 +262,7 @@ struct bpf_object {
> } *reloc;
> int nr_reloc;
> int maps_shndx;
> + int btf_maps_shndx;
> int text_shndx;
> int data_shndx;
> int rodata_shndx;
> @@ -514,6 +515,7 @@ static struct bpf_object *bpf_object__new(const char *path,
> obj->efile.obj_buf = obj_buf;
> obj->efile.obj_buf_sz = obj_buf_sz;
> obj->efile.maps_shndx = -1;
> + obj->efile.btf_maps_shndx = -1;
> obj->efile.data_shndx = -1;
> obj->efile.rodata_shndx = -1;
> obj->efile.bss_shndx = -1;
> @@ -1012,6 +1014,297 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
> return 0;
> }
>
> +static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
> + __u32 id)
> +{
> + const struct btf_type *t = btf__type_by_id(btf, id);
> +
> + while (true) {
> + switch (BTF_INFO_KIND(t->info)) {
> + case BTF_KIND_VOLATILE:
> + case BTF_KIND_CONST:
> + case BTF_KIND_RESTRICT:
> + case BTF_KIND_TYPEDEF:
> + t = btf__type_by_id(btf, t->type);
> + break;
> + default:
> + return t;
> + }
> + }
> +}
> +
> +static bool get_map_attr_int(const char *map_name,
> + const struct btf *btf,
> + const struct btf_type *def,
> + const struct btf_member *m,
> + const void *data, __u32 *res) {
Can we avoid output pointer by return 0 for invalid types?
> + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
> + const char *name = btf__name_by_offset(btf, m->name_off);
> + __u32 int_info = *(const __u32 *)(const void *)(t + 1);
> +
> + if (BTF_INFO_KIND(t->info) != BTF_KIND_INT) {
> + pr_warning("map '%s': attr '%s': expected INT, got %u.\n",
> + map_name, name, BTF_INFO_KIND(t->info));
> + return false;
> + }
> + if (t->size != 4 || BTF_INT_BITS(int_info) != 32 ||
> + BTF_INT_OFFSET(int_info)) {
> + pr_warning("map '%s': attr '%s': expected 32-bit non-bitfield integer, "
> + "got %u-byte (%d-bit) one with bit offset %d.\n",
> + map_name, name, t->size, BTF_INT_BITS(int_info),
> + BTF_INT_OFFSET(int_info));
> + return false;
> + }
> + if (BTF_INFO_KFLAG(def->info) && BTF_MEMBER_BITFIELD_SIZE(m->offset)) {
> + pr_warning("map '%s': attr '%s': bitfield is not supported.\n",
> + map_name, name);
> + return false;
> + }
> + if (m->offset % 32) {
Shall we just do "m->offset > 0" here?
> + pr_warning("map '%s': attr '%s': unaligned fields are not supported.\n",
> + map_name, name);
> + return false;
> + }
> +
> + *res = *(const __u32 *)(data + m->offset / 8);
> + return true;
> +}
> +
> +static int bpf_object__init_user_btf_map(struct bpf_object *obj,
> + const struct btf_type *sec,
> + int var_idx, int sec_idx,
> + const Elf_Data *data, bool strict)
> +{
> + const struct btf_type *var, *def, *t;
> + const struct btf_var_secinfo *vi;
> + const struct btf_var *var_extra;
> + const struct btf_member *m;
> + const void *def_data;
> + const char *map_name;
> + struct bpf_map *map;
> + int vlen, i;
> +
> + vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
> + var = btf__type_by_id(obj->btf, vi->type);
> + var_extra = (const void *)(var + 1);
> + map_name = btf__name_by_offset(obj->btf, var->name_off);
> + vlen = BTF_INFO_VLEN(var->info);
> +
> + if (map_name == NULL || map_name[0] == '\0') {
> + pr_warning("map #%d: empty name.\n", var_idx);
> + return -EINVAL;
> + }
> + if ((__u64)vi->offset + vi->size > data->d_size) {
> + pr_warning("map '%s' BTF data is corrupted.\n", map_name);
> + return -EINVAL;
> + }
> + if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
> + pr_warning("map '%s': unexpected var kind %u.\n",
> + map_name, BTF_INFO_KIND(var->info));
> + return -EINVAL;
> + }
> + if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
> + var_extra->linkage != BTF_VAR_STATIC) {
> + pr_warning("map '%s': unsupported var linkage %u.\n",
> + map_name, var_extra->linkage);
> + return -EOPNOTSUPP;
> + }
> +
> + def = skip_mods_and_typedefs(obj->btf, var->type);
> + if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
> + pr_warning("map '%s': unexpected def kind %u.\n",
> + map_name, BTF_INFO_KIND(var->info));
> + return -EINVAL;
> + }
> + if (def->size > vi->size) {
> + pr_warning("map '%s': invalid def size.\n", map_name);
> + return -EINVAL;
> + }
> +
> + map = bpf_object__add_map(obj);
> + if (IS_ERR(map))
> + return PTR_ERR(map);
> + map->name = strdup(map_name);
> + if (!map->name) {
> + pr_warning("map '%s': failed to alloc map name.\n", map_name);
> + return -ENOMEM;
> + }
> + map->libbpf_type = LIBBPF_MAP_UNSPEC;
> + map->def.type = BPF_MAP_TYPE_UNSPEC;
> + map->sec_idx = sec_idx;
> + map->sec_offset = vi->offset;
> + pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
> + map_name, map->sec_idx, map->sec_offset);
> +
> + def_data = data->d_buf + vi->offset;
> + vlen = BTF_INFO_VLEN(def->info);
> + m = (const void *)(def + 1);
> + for (i = 0; i < vlen; i++, m++) {
> + const char *name = btf__name_by_offset(obj->btf, m->name_off);
> +
I guess we need to check name == NULL here?
> + if (strcmp(name, "type") == 0) {
> + if (!get_map_attr_int(map_name, obj->btf, def, m,
> + def_data, &map->def.type))
> + return -EINVAL;
> + pr_debug("map '%s': found type = %u.\n",
> + map_name, map->def.type);
> + } else if (strcmp(name, "max_entries") == 0) {
> + if (!get_map_attr_int(map_name, obj->btf, def, m,
> + def_data, &map->def.max_entries))
> + return -EINVAL;
> + pr_debug("map '%s': found max_entries = %u.\n",
> + map_name, map->def.max_entries);
> + } else if (strcmp(name, "map_flags") == 0) {
> + if (!get_map_attr_int(map_name, obj->btf, def, m,
> + def_data, &map->def.map_flags))
> + return -EINVAL;
> + pr_debug("map '%s': found map_flags = %u.\n",
> + map_name, map->def.map_flags);
> + } else if (strcmp(name, "key_size") == 0) {
> + __u32 sz;
> +
> + if (!get_map_attr_int(map_name, obj->btf, def, m,
> + def_data, &sz))
> + return -EINVAL;
> + pr_debug("map '%s': found key_size = %u.\n",
> + map_name, sz);
> + if (map->def.key_size && map->def.key_size != sz) {
> + pr_warning("map '%s': conflictling key size %u != %u.\n",
> + map_name, map->def.key_size, sz);
> + return -EINVAL;
> + }
> + map->def.key_size = sz;
> + } else if (strcmp(name, "key") == 0) {
> + __s64 sz;
> +
> + t = btf__type_by_id(obj->btf, m->type);
> + if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
check t != NULL?
> + pr_warning("map '%s': key spec is not PTR: %u.\n",
> + map_name, BTF_INFO_KIND(t->info));
> + return -EINVAL;
> + }
> + sz = btf__resolve_size(obj->btf, t->type);
> + if (sz < 0) {
> + pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n",
> + map_name, t->type, sz);
> + return sz;
> + }
> + pr_debug("map '%s': found key [%u], sz = %lld.\n",
> + map_name, t->type, sz);
> + if (map->def.key_size && map->def.key_size != sz) {
> + pr_warning("map '%s': conflictling key size %u != %lld.\n",
> + map_name, map->def.key_size, sz);
> + return -EINVAL;
> + }
> + map->def.key_size = sz;
> + map->btf_key_type_id = t->type;
> + } else if (strcmp(name, "value_size") == 0) {
> + __u32 sz;
> +
> + if (!get_map_attr_int(map_name, obj->btf, def, m,
> + def_data, &sz))
> + return -EINVAL;
> + pr_debug("map '%s': found value_size = %u.\n",
> + map_name, sz);
> + if (map->def.value_size && map->def.value_size != sz) {
> + pr_warning("map '%s': conflictling value size %u != %u.\n",
> + map_name, map->def.value_size, sz);
> + return -EINVAL;
> + }
> + map->def.value_size = sz;
> + } else if (strcmp(name, "value") == 0) {
> + __s64 sz;
> +
> + t = btf__type_by_id(obj->btf, m->type);
ditto.
> + if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
> + pr_warning("map '%s': value spec is not PTR: %u.\n",
> + map_name, BTF_INFO_KIND(t->info));
> + return -EINVAL;
> + }
> + sz = btf__resolve_size(obj->btf, t->type);
> + if (sz < 0) {
> + pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n",
> + map_name, t->type, sz);
> + return sz;
> + }
> + pr_debug("map '%s': found value [%u], sz = %lld.\n",
> + map_name, t->type, sz);
> + if (map->def.value_size && map->def.value_size != sz) {
> + pr_warning("map '%s': conflictling value size %u != %lld.\n",
> + map_name, map->def.value_size, sz);
> + return -EINVAL;
> + }
> + map->def.value_size = sz;
> + map->btf_value_type_id = t->type;
> + } else {
> + if (strict) {
> + pr_warning("map '%s': unknown attribute '%s'.\n",
> + map_name, name);
> + return -ENOTSUP;
> + }
> + pr_debug("map '%s': ignoring unknown attribute '%s'.\n",
> + map_name, name);
> + }
> + }
> +
> + if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
> + pr_warning("map '%s': map type isn't specified.\n", map_name);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
> +{
> + const struct btf_type *sec = NULL;
> + int nr_types, i, vlen, err;
> + const struct btf_type *t;
> + const char *name;
> + Elf_Data *data;
> + Elf_Scn *scn;
> +
> + if (obj->efile.btf_maps_shndx < 0)
> + return 0;
> +
> + scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
> + if (scn)
> + data = elf_getdata(scn, NULL);
> + if (!scn || !data) {
> + pr_warning("failed to get Elf_Data from map section %d (%s)\n",
> + obj->efile.maps_shndx, MAPS_ELF_SEC);
> + return -EINVAL;
> + }
> +
> + nr_types = btf__get_nr_types(obj->btf);
> + for (i = 1; i <= nr_types; i++) {
> + t = btf__type_by_id(obj->btf, i);
> + if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
> + continue;
> + name = btf__name_by_offset(obj->btf, t->name_off);
> + if (strcmp(name, MAPS_ELF_SEC) == 0) {
> + sec = t;
> + break;
> + }
> + }
> +
> + if (!sec) {
> + pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
> + return -ENOENT;
> + }
> +
> + vlen = BTF_INFO_VLEN(sec->info);
> + for (i = 0; i < vlen; i++) {
> + err = bpf_object__init_user_btf_map(obj, sec, i,
> + obj->efile.btf_maps_shndx,
> + data, strict);
> + if (err)
> + return err;
> + }
> +
> + return 0;
> +}
> +
> static int bpf_object__init_maps(struct bpf_object *obj, int flags)
> {
> bool strict = !(flags & MAPS_RELAX_COMPAT);
> @@ -1021,6 +1314,10 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
> if (err)
> return err;
>
> + err = bpf_object__init_user_btf_maps(obj, strict);
> + if (err)
> + return err;
> +
> err = bpf_object__init_global_data_maps(obj);
> if (err)
> return err;
> @@ -1118,10 +1415,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
> }
> }
>
> +static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
> +{
> + return obj->efile.btf_maps_shndx >= 0;
> +}
> +
> static int bpf_object__init_btf(struct bpf_object *obj,
> Elf_Data *btf_data,
> Elf_Data *btf_ext_data)
> {
> + bool btf_required = bpf_object__is_btf_mandatory(obj);
> int err = 0;
>
> if (btf_data) {
> @@ -1155,10 +1458,18 @@ static int bpf_object__init_btf(struct bpf_object *obj,
> }
> out:
> if (err || IS_ERR(obj->btf)) {
> + if (btf_required)
> + err = err ? : PTR_ERR(obj->btf);
> + else
> + err = 0;
> if (!IS_ERR_OR_NULL(obj->btf))
> btf__free(obj->btf);
> obj->btf = NULL;
> }
> + if (btf_required && !obj->btf) {
> + pr_warning("BTF is required, but is missing or corrupted.\n");
> + return err == 0 ? -ENOENT : err;
> + }
> return 0;
> }
>
> @@ -1178,6 +1489,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
> BTF_ELF_SEC, err);
> btf__free(obj->btf);
> obj->btf = NULL;
> + if (bpf_object__is_btf_mandatory(obj))
> + return err;
> }
> return 0;
> }
> @@ -1241,6 +1554,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
> return err;
> } else if (strcmp(name, "maps") == 0) {
> obj->efile.maps_shndx = idx;
> + } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
> + obj->efile.btf_maps_shndx = idx;
> } else if (strcmp(name, BTF_ELF_SEC) == 0) {
> btf_data = data;
> } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
> @@ -1360,7 +1675,8 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
> static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
> int shndx)
> {
> - return shndx == obj->efile.maps_shndx;
> + return shndx == obj->efile.maps_shndx ||
> + shndx == obj->efile.btf_maps_shndx;
> }
>
> static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
> @@ -1404,14 +1720,14 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
> prog->nr_reloc = nrels;
>
> for (i = 0; i < nrels; i++) {
> - GElf_Sym sym;
> - GElf_Rel rel;
> - unsigned int insn_idx;
> - unsigned int shdr_idx;
> struct bpf_insn *insns = prog->insns;
> enum libbpf_map_type type;
> + unsigned int insn_idx;
> + unsigned int shdr_idx;
> const char *name;
> size_t map_idx;
> + GElf_Sym sym;
> + GElf_Rel rel;
>
> if (!gelf_getrel(data, i, &rel)) {
> pr_warning("relocation: failed to get %d reloc\n", i);
> @@ -1505,14 +1821,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
> return 0;
> }
>
> -static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
> +static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
> {
> struct bpf_map_def *def = &map->def;
> __u32 key_type_id = 0, value_type_id = 0;
> int ret;
>
> + /* if it's BTF-defined map, we don't need to search for type IDs */
> + if (map->sec_idx == obj->efile.btf_maps_shndx)
> + return 0;
> +
> if (!bpf_map__is_internal(map)) {
> - ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
> + ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
> def->value_size, &key_type_id,
> &value_type_id);
> } else {
> @@ -1520,7 +1840,7 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
> * LLVM annotates global data differently in BTF, that is,
> * only as '.data', '.bss' or '.rodata'.
> */
> - ret = btf__find_by_name(btf,
> + ret = btf__find_by_name(obj->btf,
> libbpf_type_to_btf_name[map->libbpf_type]);
> }
> if (ret < 0)
> @@ -1810,7 +2130,7 @@ bpf_object__create_maps(struct bpf_object *obj)
> map->inner_map_fd >= 0)
> create_attr.inner_map_fd = map->inner_map_fd;
>
> - if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
> + if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
> create_attr.btf_fd = btf__fd(obj->btf);
> create_attr.btf_key_type_id = map->btf_key_type_id;
> create_attr.btf_value_type_id = map->btf_value_type_id;
> --
> 2.17.1
>
Powered by blists - more mailing lists