netdev - Re: [PATCH bpf-next v5 5/9] bpftool: Implement btfgen()

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHap4zuWAmBBhTpUef44z42RjQtqmhOMFjB-yBr8AuqGyYnKCw@mail.gmail.com>
Date:   Thu, 3 Feb 2022 11:09:57 -0500
From:   Mauricio Vásquez Bernal <mauricio@...volk.io>
To:     Andrii Nakryiko <andrii.nakryiko@...il.com>
Cc:     Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>,
        Quentin Monnet <quentin@...valent.com>,
        Rafael David Tinoco <rafaeldtinoco@...il.com>,
        Lorenzo Fontana <lorenzo.fontana@...stic.co>,
        Leonardo Di Donato <leonardo.didonato@...stic.co>
Subject: Re: [PATCH bpf-next v5 5/9] bpftool: Implement btfgen()

On Wed, Feb 2, 2022 at 2:15 PM Andrii Nakryiko
<andrii.nakryiko@...il.com> wrote:
>
> On Fri, Jan 28, 2022 at 2:33 PM Mauricio Vásquez <mauricio@...volk.io> wrote:
> >
> > btfgen() receives the path of a source and destination BTF files and a
> > list of BPF objects. This function records the relocations for all
> > objects and then generates the BTF file by calling btfgen_get_btf()
> > (implemented in the following commits).
> >
> > btfgen_record_obj() loads the BTF and BTF.ext sections of the BPF
> > objects and loops through all CO-RE relocations. It uses
> > bpf_core_calc_relo_insn() from libbpf and passes the target spec to
> > btfgen_record_reloc() that saves the types involved in such relocation.
> >
> > Signed-off-by: Mauricio Vásquez <mauricio@...volk.io>
> > Signed-off-by: Rafael David Tinoco <rafael.tinoco@...asec.com>
> > Signed-off-by: Lorenzo Fontana <lorenzo.fontana@...stic.co>
> > Signed-off-by: Leonardo Di Donato <leonardo.didonato@...stic.co>
> > ---
> >  tools/bpf/bpftool/Makefile |   8 +-
> >  tools/bpf/bpftool/gen.c    | 221 ++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 223 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
> > index 83369f55df61..97d447135536 100644
> > --- a/tools/bpf/bpftool/Makefile
> > +++ b/tools/bpf/bpftool/Makefile
> > @@ -34,10 +34,10 @@ LIBBPF_BOOTSTRAP_INCLUDE := $(LIBBPF_BOOTSTRAP_DESTDIR)/include
> >  LIBBPF_BOOTSTRAP_HDRS_DIR := $(LIBBPF_BOOTSTRAP_INCLUDE)/bpf
> >  LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
> >
> > -# We need to copy hashmap.h and nlattr.h which is not otherwise exported by
> > -# libbpf, but still required by bpftool.
> > -LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h)
> > -LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h)
> > +# We need to copy hashmap.h, nlattr.h, relo_core.h and libbpf_internal.h
> > +# which are not otherwise exported by libbpf, but still required by bpftool.
> > +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h relo_core.h libbpf_internal.h)
> > +LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h relo_core.h libbpf_internal.h)
> >
> >  ifeq ($(BPFTOOL_VERSION),)
> >  BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
> > diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> > index 68bb88e86b27..bb9c56401ee5 100644
> > --- a/tools/bpf/bpftool/gen.c
> > +++ b/tools/bpf/bpftool/gen.c
> > @@ -15,6 +15,7 @@
> >  #include <unistd.h>
> >  #include <bpf/bpf.h>
> >  #include <bpf/libbpf.h>
> > +#include <bpf/libbpf_internal.h>
> >  #include <sys/types.h>
> >  #include <sys/stat.h>
> >  #include <sys/mman.h>
> > @@ -1143,6 +1144,11 @@ static void *uint_as_hash_key(int x)
> >         return (void *)(uintptr_t)x;
> >  }
> >
> > +static void *u32_as_hash_key(__u32 x)
> > +{
> > +       return (void *)(uintptr_t)x;
> > +}
> > +
> >  static void btfgen_free_type(struct btfgen_type *type)
> >  {
> >         free(type);
> > @@ -1193,12 +1199,223 @@ btfgen_new_info(const char *targ_btf_path)
> >         return info;
> >  }
> >
> > -/* Create BTF file for a set of BPF objects */
> > -static int btfgen(const char *src_btf, const char *dst_btf, const char *objspaths[])
> > +static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> > +{
> > +       return -EOPNOTSUPP;
> > +}
> > +
> > +static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> > +{
> > +       return -EOPNOTSUPP;
> > +}
> > +
> > +static int btfgen_record_enumval_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> >  {
> >         return -EOPNOTSUPP;
> >  }
> >
> > +static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *res)
> > +{
> > +       switch (res->relo_kind) {
> > +       case BPF_CORE_FIELD_BYTE_OFFSET:
> > +       case BPF_CORE_FIELD_BYTE_SIZE:
> > +       case BPF_CORE_FIELD_EXISTS:
> > +       case BPF_CORE_FIELD_SIGNED:
> > +       case BPF_CORE_FIELD_LSHIFT_U64:
> > +       case BPF_CORE_FIELD_RSHIFT_U64:
> > +               return btfgen_record_field_relo(info, res);
> > +       case BPF_CORE_TYPE_ID_LOCAL:
> > +       case BPF_CORE_TYPE_ID_TARGET:
> > +       case BPF_CORE_TYPE_EXISTS:
> > +       case BPF_CORE_TYPE_SIZE:
> > +               return btfgen_record_type_relo(info, res);
> > +       case BPF_CORE_ENUMVAL_EXISTS:
> > +       case BPF_CORE_ENUMVAL_VALUE:
> > +               return btfgen_record_enumval_relo(info, res);
> > +       default:
> > +               return -EINVAL;
> > +       }
> > +}
> > +
> > +static struct bpf_core_cand_list *
> > +btfgen_find_cands(const struct btf *local_btf, const struct btf *targ_btf, __u32 local_id)
> > +{
> > +       const struct btf_type *local_type;
> > +       struct bpf_core_cand_list *cands = NULL;
> > +       struct bpf_core_cand local_cand = {};
> > +       size_t local_essent_len;
> > +       const char *local_name;
> > +       int err;
> > +
> > +       local_cand.btf = local_btf;
> > +       local_cand.id = local_id;
> > +
> > +       local_type = btf__type_by_id(local_btf, local_id);
> > +       if (!local_type) {
> > +               err = -EINVAL;
> > +               goto err_out;
> > +       }
> > +
> > +       local_name = btf__name_by_offset(local_btf, local_type->name_off);
> > +       if (!local_name) {
> > +               err = -EINVAL;
> > +               goto err_out;
> > +       }
> > +       local_essent_len = bpf_core_essential_name_len(local_name);
> > +
> > +       cands = calloc(1, sizeof(*cands));
> > +       if (!cands)
> > +               return NULL;
> > +
> > +       err = bpf_core_add_cands(&local_cand, local_essent_len, targ_btf, "vmlinux", 1, cands);
> > +       if (err)
> > +               goto err_out;
> > +
> > +       return cands;
> > +
> > +err_out:
> > +       if (cands)
> > +               bpf_core_free_cands(cands);
>
> we can also add if (!cands) return into bpf_core_free_cands(), like
> all other public destructor APIs in libbpf do
>

Makes sense.

> > +       errno = -err;
> > +       return NULL;
> > +}
> > +
> > +/* Record relocation information for a single BPF object*/
> > +static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path)
> > +{
> > +       const struct btf_ext_info_sec *sec;
> > +       const struct bpf_core_relo *relo;
> > +       const struct btf_ext_info *seg;
> > +       struct hashmap *cand_cache;
> > +       struct btf_ext *btf_ext;
> > +       unsigned int relo_idx;
> > +       struct btf *btf;
> > +       int err;
> > +
> > +       btf = btf__parse(obj_path, &btf_ext);
> > +       err = libbpf_get_error(btf);
>
> same, libbpf 1.0 mode, no need for libbpf_get_error()
>
> > +       if (err) {
> > +               p_err("failed to parse bpf object '%s': %s", obj_path, strerror(errno));
>
> nit: "BPF object"? I'm not sure we do this consistently, but BPF
> should be spelled with capitals in logs
>
>
>
> > +               return err;
> > +       }
> > +
> > +       if (btf_ext->core_relo_info.len == 0)
> > +               return 0;
> > +
> > +       cand_cache = bpf_core_create_cand_cache();
> > +       if (IS_ERR(cand_cache))
> > +               return PTR_ERR(cand_cache);
> > +
> > +       seg = &btf_ext->core_relo_info;
> > +       for_each_btf_ext_sec(seg, sec) {
> > +               for_each_btf_ext_rec(seg, sec, relo_idx, relo) {
> > +                       struct bpf_core_spec specs_scratch[3] = {};
> > +                       struct bpf_core_relo_res targ_res = {};
> > +                       struct bpf_core_cand_list *cands = NULL;
> > +                       const void *type_key = u32_as_hash_key(relo->type_id);
> > +                       const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off);
> > +
> > +                       if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
> > +                           !hashmap__find(cand_cache, type_key, (void **)&cands)) {
> > +                               cands = btfgen_find_cands(btf, info->src_btf, relo->type_id);
> > +                               if (!cands) {
> > +                                       err = -errno;
> > +                                       goto out;
> > +                               }
> > +
> > +                               err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
> > +                               if (err)
> > +                                       goto out;
> > +                       }
> > +
> > +                       err = bpf_core_calc_relo_insn(sec_name, relo, relo_idx, btf, cands,
> > +                                                     specs_scratch, &targ_res);
>
> it feels like you forgot to submit some important patch, as I said, I
> can't find bpf_core_calc_relo_insn() anywhere
>

Yes, sorry about that. Will be in the next iteration.

>
> > +                       if (err)
> > +                               goto out;
> > +
> > +                       err = btfgen_record_reloc(info, &specs_scratch[2]);
>
> at least let's leave a comment that specs_scratch[2] is target spec
> (but that's an implementation detail, ugh...)
>

Right.

>
>
> > +                       if (err)
> > +                               goto out;
> > +               }
> > +       }
> > +
> > +out:
> > +       bpf_core_free_cand_cache(cand_cache);
> > +
> > +       return err;
> > +}
> > +
> > +/* Generate BTF from relocation information previously recorded */
> > +static struct btf *btfgen_get_btf(struct btfgen_info *info)
> > +{
> > +       return ERR_PTR(-EOPNOTSUPP);
> > +}
> > +
> > +/* Create BTF file for a set of BPF objects.
> > + *
> > + * The BTFGen algorithm is divided in two main parts: (1) collect the
> > + * BTF types that are involved in relocations and (2) generate the BTF
> > + * object using the collected types.
> > + *
> > + * In order to collect the types involved in the relocations, we parse
> > + * the BTF and BTF.ext sections of the BPF objects and use
> > + * bpf_core_calc_relo_insn() to get the target specification, this
> > + * indicates how the types and fields are used in a relocation.
> > + *
> > + * Types are recorded in different ways according to the kind of the
> > + * relocation. For field-based relocations only the members that are
> > + * actually used are saved in order to reduce the size of the generated
> > + * BTF file. For type-based and enum-based relocations the whole type is
> > + * saved.
> > + *
> > + * The second part of the algorithm generates the BTF object. It creates
> > + * an empty BTF object and fills it with the types recorded in the
> > + * previous step. This function takes care of only adding the structure
> > + * and union members that were marked as used and it also fixes up the
> > + * type IDs on the generated BTF object.
> > + */
> > +static int btfgen(const char *src_btf, const char *dst_btf, const char *objspaths[])
>
> naming nit: btfgen is actually misleading. pahole is "btfgen", but
> this is actually some sort of "BTF minimizer". So something like
> "minimize_btf" would be a bit more descriptive
>

I agree that minimize_btf() is better. I think we can continue to use
btfgen_* for the different types.


> > +{
> > +       struct btfgen_info *info;
> > +       struct btf *btf_new = NULL;
> > +       int err;
> > +
> > +       info = btfgen_new_info(src_btf);
> > +       if (!info) {
> > +               p_err("failed to allocate info structure: %s", strerror(errno));
> > +               err = -errno;
> > +               goto out;
> > +       }
> > +
> > +       for (int i = 0; objspaths[i] != NULL; i++) {
> > +               p_info("Processing BPF object: %s", objspaths[i]);
> > +
> > +               err = btfgen_record_obj(info, objspaths[i]);
> > +               if (err)
> > +                       goto out;
> > +       }
> > +
> > +       btf_new = btfgen_get_btf(info);
> > +       if (!btf_new) {
> > +               err = -errno;
> > +               p_err("error generating btf: %s", strerror(errno));
> > +               goto out;
> > +       }
> > +
> > +       p_info("Creating BTF file: %s", dst_btf);
>
> normally tools don't advertise each action through logs, unless it's
> some verbose mode. Let's dop one BTF generated per one bpftool
> invocation and drop all this descriptive logging. User will know input
> and output BTFs exactly, because they will specify it as input
> arguments (so no need to parse any output just to know what went
> where, etc).
>

> > +       err = btf_save_raw(btf_new, dst_btf);
> > +       if (err) {
> > +               p_err("error saving btf file: %s", strerror(errno));
> > +               goto out;
> > +       }
> > +
> > +out:
> > +       btf__free(btf_new);
> > +       btfgen_free_info(info);
> > +
> > +       return err;
> > +}
> > +
> >  static int do_min_core_btf(int argc, char **argv)
> >  {
> >         char src_btf_path[PATH_MAX], dst_btf_path[PATH_MAX];
> > --
> > 2.25.1
> >