[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHap4zuWAmBBhTpUef44z42RjQtqmhOMFjB-yBr8AuqGyYnKCw@mail.gmail.com>
Date: Thu, 3 Feb 2022 11:09:57 -0500
From: Mauricio Vásquez Bernal <mauricio@...volk.io>
To: Andrii Nakryiko <andrii.nakryiko@...il.com>
Cc: Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Quentin Monnet <quentin@...valent.com>,
Rafael David Tinoco <rafaeldtinoco@...il.com>,
Lorenzo Fontana <lorenzo.fontana@...stic.co>,
Leonardo Di Donato <leonardo.didonato@...stic.co>
Subject: Re: [PATCH bpf-next v5 5/9] bpftool: Implement btfgen()
On Wed, Feb 2, 2022 at 2:15 PM Andrii Nakryiko
<andrii.nakryiko@...il.com> wrote:
>
> On Fri, Jan 28, 2022 at 2:33 PM Mauricio Vásquez <mauricio@...volk.io> wrote:
> >
> > btfgen() receives the path of a source and destination BTF files and a
> > list of BPF objects. This function records the relocations for all
> > objects and then generates the BTF file by calling btfgen_get_btf()
> > (implemented in the following commits).
> >
> > btfgen_record_obj() loads the BTF and BTF.ext sections of the BPF
> > objects and loops through all CO-RE relocations. It uses
> > bpf_core_calc_relo_insn() from libbpf and passes the target spec to
> > btfgen_record_reloc() that saves the types involved in such relocation.
> >
> > Signed-off-by: Mauricio Vásquez <mauricio@...volk.io>
> > Signed-off-by: Rafael David Tinoco <rafael.tinoco@...asec.com>
> > Signed-off-by: Lorenzo Fontana <lorenzo.fontana@...stic.co>
> > Signed-off-by: Leonardo Di Donato <leonardo.didonato@...stic.co>
> > ---
> > tools/bpf/bpftool/Makefile | 8 +-
> > tools/bpf/bpftool/gen.c | 221 ++++++++++++++++++++++++++++++++++++-
> > 2 files changed, 223 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
> > index 83369f55df61..97d447135536 100644
> > --- a/tools/bpf/bpftool/Makefile
> > +++ b/tools/bpf/bpftool/Makefile
> > @@ -34,10 +34,10 @@ LIBBPF_BOOTSTRAP_INCLUDE := $(LIBBPF_BOOTSTRAP_DESTDIR)/include
> > LIBBPF_BOOTSTRAP_HDRS_DIR := $(LIBBPF_BOOTSTRAP_INCLUDE)/bpf
> > LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
> >
> > -# We need to copy hashmap.h and nlattr.h which is not otherwise exported by
> > -# libbpf, but still required by bpftool.
> > -LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h)
> > -LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h)
> > +# We need to copy hashmap.h, nlattr.h, relo_core.h and libbpf_internal.h
> > +# which are not otherwise exported by libbpf, but still required by bpftool.
> > +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h relo_core.h libbpf_internal.h)
> > +LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h relo_core.h libbpf_internal.h)
> >
> > ifeq ($(BPFTOOL_VERSION),)
> > BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
> > diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> > index 68bb88e86b27..bb9c56401ee5 100644
> > --- a/tools/bpf/bpftool/gen.c
> > +++ b/tools/bpf/bpftool/gen.c
> > @@ -15,6 +15,7 @@
> > #include <unistd.h>
> > #include <bpf/bpf.h>
> > #include <bpf/libbpf.h>
> > +#include <bpf/libbpf_internal.h>
> > #include <sys/types.h>
> > #include <sys/stat.h>
> > #include <sys/mman.h>
> > @@ -1143,6 +1144,11 @@ static void *uint_as_hash_key(int x)
> > return (void *)(uintptr_t)x;
> > }
> >
> > +static void *u32_as_hash_key(__u32 x)
> > +{
> > + return (void *)(uintptr_t)x;
> > +}
> > +
> > static void btfgen_free_type(struct btfgen_type *type)
> > {
> > free(type);
> > @@ -1193,12 +1199,223 @@ btfgen_new_info(const char *targ_btf_path)
> > return info;
> > }
> >
> > -/* Create BTF file for a set of BPF objects */
> > -static int btfgen(const char *src_btf, const char *dst_btf, const char *objspaths[])
> > +static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> > +{
> > + return -EOPNOTSUPP;
> > +}
> > +
> > +static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> > +{
> > + return -EOPNOTSUPP;
> > +}
> > +
> > +static int btfgen_record_enumval_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> > {
> > return -EOPNOTSUPP;
> > }
> >
> > +static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *res)
> > +{
> > + switch (res->relo_kind) {
> > + case BPF_CORE_FIELD_BYTE_OFFSET:
> > + case BPF_CORE_FIELD_BYTE_SIZE:
> > + case BPF_CORE_FIELD_EXISTS:
> > + case BPF_CORE_FIELD_SIGNED:
> > + case BPF_CORE_FIELD_LSHIFT_U64:
> > + case BPF_CORE_FIELD_RSHIFT_U64:
> > + return btfgen_record_field_relo(info, res);
> > + case BPF_CORE_TYPE_ID_LOCAL:
> > + case BPF_CORE_TYPE_ID_TARGET:
> > + case BPF_CORE_TYPE_EXISTS:
> > + case BPF_CORE_TYPE_SIZE:
> > + return btfgen_record_type_relo(info, res);
> > + case BPF_CORE_ENUMVAL_EXISTS:
> > + case BPF_CORE_ENUMVAL_VALUE:
> > + return btfgen_record_enumval_relo(info, res);
> > + default:
> > + return -EINVAL;
> > + }
> > +}
> > +
> > +static struct bpf_core_cand_list *
> > +btfgen_find_cands(const struct btf *local_btf, const struct btf *targ_btf, __u32 local_id)
> > +{
> > + const struct btf_type *local_type;
> > + struct bpf_core_cand_list *cands = NULL;
> > + struct bpf_core_cand local_cand = {};
> > + size_t local_essent_len;
> > + const char *local_name;
> > + int err;
> > +
> > + local_cand.btf = local_btf;
> > + local_cand.id = local_id;
> > +
> > + local_type = btf__type_by_id(local_btf, local_id);
> > + if (!local_type) {
> > + err = -EINVAL;
> > + goto err_out;
> > + }
> > +
> > + local_name = btf__name_by_offset(local_btf, local_type->name_off);
> > + if (!local_name) {
> > + err = -EINVAL;
> > + goto err_out;
> > + }
> > + local_essent_len = bpf_core_essential_name_len(local_name);
> > +
> > + cands = calloc(1, sizeof(*cands));
> > + if (!cands)
> > + return NULL;
> > +
> > + err = bpf_core_add_cands(&local_cand, local_essent_len, targ_btf, "vmlinux", 1, cands);
> > + if (err)
> > + goto err_out;
> > +
> > + return cands;
> > +
> > +err_out:
> > + if (cands)
> > + bpf_core_free_cands(cands);
>
> we can also add if (!cands) return into bpf_core_free_cands(), like
> all other public destructor APIs in libbpf do
>
Makes sense.
> > + errno = -err;
> > + return NULL;
> > +}
> > +
> > +/* Record relocation information for a single BPF object*/
> > +static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path)
> > +{
> > + const struct btf_ext_info_sec *sec;
> > + const struct bpf_core_relo *relo;
> > + const struct btf_ext_info *seg;
> > + struct hashmap *cand_cache;
> > + struct btf_ext *btf_ext;
> > + unsigned int relo_idx;
> > + struct btf *btf;
> > + int err;
> > +
> > + btf = btf__parse(obj_path, &btf_ext);
> > + err = libbpf_get_error(btf);
>
> same, libbpf 1.0 mode, no need for libbpf_get_error()
>
> > + if (err) {
> > + p_err("failed to parse bpf object '%s': %s", obj_path, strerror(errno));
>
> nit: "BPF object"? I'm not sure we do this consistently, but BPF
> should be spelled with capitals in logs
>
>
>
> > + return err;
> > + }
> > +
> > + if (btf_ext->core_relo_info.len == 0)
> > + return 0;
> > +
> > + cand_cache = bpf_core_create_cand_cache();
> > + if (IS_ERR(cand_cache))
> > + return PTR_ERR(cand_cache);
> > +
> > + seg = &btf_ext->core_relo_info;
> > + for_each_btf_ext_sec(seg, sec) {
> > + for_each_btf_ext_rec(seg, sec, relo_idx, relo) {
> > + struct bpf_core_spec specs_scratch[3] = {};
> > + struct bpf_core_relo_res targ_res = {};
> > + struct bpf_core_cand_list *cands = NULL;
> > + const void *type_key = u32_as_hash_key(relo->type_id);
> > + const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off);
> > +
> > + if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
> > + !hashmap__find(cand_cache, type_key, (void **)&cands)) {
> > + cands = btfgen_find_cands(btf, info->src_btf, relo->type_id);
> > + if (!cands) {
> > + err = -errno;
> > + goto out;
> > + }
> > +
> > + err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
> > + if (err)
> > + goto out;
> > + }
> > +
> > + err = bpf_core_calc_relo_insn(sec_name, relo, relo_idx, btf, cands,
> > + specs_scratch, &targ_res);
>
> it feels like you forgot to submit some important patch, as I said, I
> can't find bpf_core_calc_relo_insn() anywhere
>
Yes, sorry about that. Will be in the next iteration.
>
> > + if (err)
> > + goto out;
> > +
> > + err = btfgen_record_reloc(info, &specs_scratch[2]);
>
> at least let's leave a comment that specs_scratch[2] is target spec
> (but that's an implementation detail, ugh...)
>
Right.
>
>
> > + if (err)
> > + goto out;
> > + }
> > + }
> > +
> > +out:
> > + bpf_core_free_cand_cache(cand_cache);
> > +
> > + return err;
> > +}
> > +
> > +/* Generate BTF from relocation information previously recorded */
> > +static struct btf *btfgen_get_btf(struct btfgen_info *info)
> > +{
> > + return ERR_PTR(-EOPNOTSUPP);
> > +}
> > +
> > +/* Create BTF file for a set of BPF objects.
> > + *
> > + * The BTFGen algorithm is divided in two main parts: (1) collect the
> > + * BTF types that are involved in relocations and (2) generate the BTF
> > + * object using the collected types.
> > + *
> > + * In order to collect the types involved in the relocations, we parse
> > + * the BTF and BTF.ext sections of the BPF objects and use
> > + * bpf_core_calc_relo_insn() to get the target specification, this
> > + * indicates how the types and fields are used in a relocation.
> > + *
> > + * Types are recorded in different ways according to the kind of the
> > + * relocation. For field-based relocations only the members that are
> > + * actually used are saved in order to reduce the size of the generated
> > + * BTF file. For type-based and enum-based relocations the whole type is
> > + * saved.
> > + *
> > + * The second part of the algorithm generates the BTF object. It creates
> > + * an empty BTF object and fills it with the types recorded in the
> > + * previous step. This function takes care of only adding the structure
> > + * and union members that were marked as used and it also fixes up the
> > + * type IDs on the generated BTF object.
> > + */
> > +static int btfgen(const char *src_btf, const char *dst_btf, const char *objspaths[])
>
> naming nit: btfgen is actually misleading. pahole is "btfgen", but
> this is actually some sort of "BTF minimizer". So something like
> "minimize_btf" would be a bit more descriptive
>
I agree that minimize_btf() is better. I think we can continue to use
btfgen_* for the different types.
> > +{
> > + struct btfgen_info *info;
> > + struct btf *btf_new = NULL;
> > + int err;
> > +
> > + info = btfgen_new_info(src_btf);
> > + if (!info) {
> > + p_err("failed to allocate info structure: %s", strerror(errno));
> > + err = -errno;
> > + goto out;
> > + }
> > +
> > + for (int i = 0; objspaths[i] != NULL; i++) {
> > + p_info("Processing BPF object: %s", objspaths[i]);
> > +
> > + err = btfgen_record_obj(info, objspaths[i]);
> > + if (err)
> > + goto out;
> > + }
> > +
> > + btf_new = btfgen_get_btf(info);
> > + if (!btf_new) {
> > + err = -errno;
> > + p_err("error generating btf: %s", strerror(errno));
> > + goto out;
> > + }
> > +
> > + p_info("Creating BTF file: %s", dst_btf);
>
> normally tools don't advertise each action through logs, unless it's
> some verbose mode. Let's dop one BTF generated per one bpftool
> invocation and drop all this descriptive logging. User will know input
> and output BTFs exactly, because they will specify it as input
> arguments (so no need to parse any output just to know what went
> where, etc).
>
> > + err = btf_save_raw(btf_new, dst_btf);
> > + if (err) {
> > + p_err("error saving btf file: %s", strerror(errno));
> > + goto out;
> > + }
> > +
> > +out:
> > + btf__free(btf_new);
> > + btfgen_free_info(info);
> > +
> > + return err;
> > +}
> > +
> > static int do_min_core_btf(int argc, char **argv)
> > {
> > char src_btf_path[PATH_MAX], dst_btf_path[PATH_MAX];
> > --
> > 2.25.1
> >
Powered by blists - more mailing lists