lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 2 Feb 2022 11:14:57 -0800
From:   Andrii Nakryiko <andrii.nakryiko@...il.com>
To:     Mauricio Vásquez <mauricio@...volk.io>
Cc:     Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>,
        Quentin Monnet <quentin@...valent.com>,
        Rafael David Tinoco <rafaeldtinoco@...il.com>,
        Lorenzo Fontana <lorenzo.fontana@...stic.co>,
        Leonardo Di Donato <leonardo.didonato@...stic.co>
Subject: Re: [PATCH bpf-next v5 5/9] bpftool: Implement btfgen()

On Fri, Jan 28, 2022 at 2:33 PM Mauricio Vásquez <mauricio@...volk.io> wrote:
>
> btfgen() receives the path of a source and destination BTF files and a
> list of BPF objects. This function records the relocations for all
> objects and then generates the BTF file by calling btfgen_get_btf()
> (implemented in the following commits).
>
> btfgen_record_obj() loads the BTF and BTF.ext sections of the BPF
> objects and loops through all CO-RE relocations. It uses
> bpf_core_calc_relo_insn() from libbpf and passes the target spec to
> btfgen_record_reloc() that saves the types involved in such relocation.
>
> Signed-off-by: Mauricio Vásquez <mauricio@...volk.io>
> Signed-off-by: Rafael David Tinoco <rafael.tinoco@...asec.com>
> Signed-off-by: Lorenzo Fontana <lorenzo.fontana@...stic.co>
> Signed-off-by: Leonardo Di Donato <leonardo.didonato@...stic.co>
> ---
>  tools/bpf/bpftool/Makefile |   8 +-
>  tools/bpf/bpftool/gen.c    | 221 ++++++++++++++++++++++++++++++++++++-
>  2 files changed, 223 insertions(+), 6 deletions(-)
>
> diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
> index 83369f55df61..97d447135536 100644
> --- a/tools/bpf/bpftool/Makefile
> +++ b/tools/bpf/bpftool/Makefile
> @@ -34,10 +34,10 @@ LIBBPF_BOOTSTRAP_INCLUDE := $(LIBBPF_BOOTSTRAP_DESTDIR)/include
>  LIBBPF_BOOTSTRAP_HDRS_DIR := $(LIBBPF_BOOTSTRAP_INCLUDE)/bpf
>  LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
>
> -# We need to copy hashmap.h and nlattr.h which is not otherwise exported by
> -# libbpf, but still required by bpftool.
> -LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h)
> -LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h)
> +# We need to copy hashmap.h, nlattr.h, relo_core.h and libbpf_internal.h
> +# which are not otherwise exported by libbpf, but still required by bpftool.
> +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h relo_core.h libbpf_internal.h)
> +LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h relo_core.h libbpf_internal.h)
>
>  ifeq ($(BPFTOOL_VERSION),)
>  BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> index 68bb88e86b27..bb9c56401ee5 100644
> --- a/tools/bpf/bpftool/gen.c
> +++ b/tools/bpf/bpftool/gen.c
> @@ -15,6 +15,7 @@
>  #include <unistd.h>
>  #include <bpf/bpf.h>
>  #include <bpf/libbpf.h>
> +#include <bpf/libbpf_internal.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <sys/mman.h>
> @@ -1143,6 +1144,11 @@ static void *uint_as_hash_key(int x)
>         return (void *)(uintptr_t)x;
>  }
>
> +static void *u32_as_hash_key(__u32 x)
> +{
> +       return (void *)(uintptr_t)x;
> +}
> +
>  static void btfgen_free_type(struct btfgen_type *type)
>  {
>         free(type);
> @@ -1193,12 +1199,223 @@ btfgen_new_info(const char *targ_btf_path)
>         return info;
>  }
>
> -/* Create BTF file for a set of BPF objects */
> -static int btfgen(const char *src_btf, const char *dst_btf, const char *objspaths[])
> +static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> +{
> +       return -EOPNOTSUPP;
> +}
> +
> +static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
> +{
> +       return -EOPNOTSUPP;
> +}
> +
> +static int btfgen_record_enumval_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
>  {
>         return -EOPNOTSUPP;
>  }
>
> +static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *res)
> +{
> +       switch (res->relo_kind) {
> +       case BPF_CORE_FIELD_BYTE_OFFSET:
> +       case BPF_CORE_FIELD_BYTE_SIZE:
> +       case BPF_CORE_FIELD_EXISTS:
> +       case BPF_CORE_FIELD_SIGNED:
> +       case BPF_CORE_FIELD_LSHIFT_U64:
> +       case BPF_CORE_FIELD_RSHIFT_U64:
> +               return btfgen_record_field_relo(info, res);
> +       case BPF_CORE_TYPE_ID_LOCAL:
> +       case BPF_CORE_TYPE_ID_TARGET:
> +       case BPF_CORE_TYPE_EXISTS:
> +       case BPF_CORE_TYPE_SIZE:
> +               return btfgen_record_type_relo(info, res);
> +       case BPF_CORE_ENUMVAL_EXISTS:
> +       case BPF_CORE_ENUMVAL_VALUE:
> +               return btfgen_record_enumval_relo(info, res);
> +       default:
> +               return -EINVAL;
> +       }
> +}
> +
> +static struct bpf_core_cand_list *
> +btfgen_find_cands(const struct btf *local_btf, const struct btf *targ_btf, __u32 local_id)
> +{
> +       const struct btf_type *local_type;
> +       struct bpf_core_cand_list *cands = NULL;
> +       struct bpf_core_cand local_cand = {};
> +       size_t local_essent_len;
> +       const char *local_name;
> +       int err;
> +
> +       local_cand.btf = local_btf;
> +       local_cand.id = local_id;
> +
> +       local_type = btf__type_by_id(local_btf, local_id);
> +       if (!local_type) {
> +               err = -EINVAL;
> +               goto err_out;
> +       }
> +
> +       local_name = btf__name_by_offset(local_btf, local_type->name_off);
> +       if (!local_name) {
> +               err = -EINVAL;
> +               goto err_out;
> +       }
> +       local_essent_len = bpf_core_essential_name_len(local_name);
> +
> +       cands = calloc(1, sizeof(*cands));
> +       if (!cands)
> +               return NULL;
> +
> +       err = bpf_core_add_cands(&local_cand, local_essent_len, targ_btf, "vmlinux", 1, cands);
> +       if (err)
> +               goto err_out;
> +
> +       return cands;
> +
> +err_out:
> +       if (cands)
> +               bpf_core_free_cands(cands);

we can also add if (!cands) return into bpf_core_free_cands(), like
all other public destructor APIs in libbpf do

> +       errno = -err;
> +       return NULL;
> +}
> +
> +/* Record relocation information for a single BPF object*/
> +static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path)
> +{
> +       const struct btf_ext_info_sec *sec;
> +       const struct bpf_core_relo *relo;
> +       const struct btf_ext_info *seg;
> +       struct hashmap *cand_cache;
> +       struct btf_ext *btf_ext;
> +       unsigned int relo_idx;
> +       struct btf *btf;
> +       int err;
> +
> +       btf = btf__parse(obj_path, &btf_ext);
> +       err = libbpf_get_error(btf);

same, libbpf 1.0 mode, no need for libbpf_get_error()

> +       if (err) {
> +               p_err("failed to parse bpf object '%s': %s", obj_path, strerror(errno));

nit: "BPF object"? I'm not sure we do this consistently, but BPF
should be spelled with capitals in logs



> +               return err;
> +       }
> +
> +       if (btf_ext->core_relo_info.len == 0)
> +               return 0;
> +
> +       cand_cache = bpf_core_create_cand_cache();
> +       if (IS_ERR(cand_cache))
> +               return PTR_ERR(cand_cache);
> +
> +       seg = &btf_ext->core_relo_info;
> +       for_each_btf_ext_sec(seg, sec) {
> +               for_each_btf_ext_rec(seg, sec, relo_idx, relo) {
> +                       struct bpf_core_spec specs_scratch[3] = {};
> +                       struct bpf_core_relo_res targ_res = {};
> +                       struct bpf_core_cand_list *cands = NULL;
> +                       const void *type_key = u32_as_hash_key(relo->type_id);
> +                       const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off);
> +
> +                       if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
> +                           !hashmap__find(cand_cache, type_key, (void **)&cands)) {
> +                               cands = btfgen_find_cands(btf, info->src_btf, relo->type_id);
> +                               if (!cands) {
> +                                       err = -errno;
> +                                       goto out;
> +                               }
> +
> +                               err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
> +                               if (err)
> +                                       goto out;
> +                       }
> +
> +                       err = bpf_core_calc_relo_insn(sec_name, relo, relo_idx, btf, cands,
> +                                                     specs_scratch, &targ_res);

it feels like you forgot to submit some important patch, as I said, I
can't find bpf_core_calc_relo_insn() anywhere


> +                       if (err)
> +                               goto out;
> +
> +                       err = btfgen_record_reloc(info, &specs_scratch[2]);

at least let's leave a comment that specs_scratch[2] is target spec
(but that's an implementation detail, ugh...)



> +                       if (err)
> +                               goto out;
> +               }
> +       }
> +
> +out:
> +       bpf_core_free_cand_cache(cand_cache);
> +
> +       return err;
> +}
> +
> +/* Generate BTF from relocation information previously recorded */
> +static struct btf *btfgen_get_btf(struct btfgen_info *info)
> +{
> +       return ERR_PTR(-EOPNOTSUPP);
> +}
> +
> +/* Create BTF file for a set of BPF objects.
> + *
> + * The BTFGen algorithm is divided in two main parts: (1) collect the
> + * BTF types that are involved in relocations and (2) generate the BTF
> + * object using the collected types.
> + *
> + * In order to collect the types involved in the relocations, we parse
> + * the BTF and BTF.ext sections of the BPF objects and use
> + * bpf_core_calc_relo_insn() to get the target specification, this
> + * indicates how the types and fields are used in a relocation.
> + *
> + * Types are recorded in different ways according to the kind of the
> + * relocation. For field-based relocations only the members that are
> + * actually used are saved in order to reduce the size of the generated
> + * BTF file. For type-based and enum-based relocations the whole type is
> + * saved.
> + *
> + * The second part of the algorithm generates the BTF object. It creates
> + * an empty BTF object and fills it with the types recorded in the
> + * previous step. This function takes care of only adding the structure
> + * and union members that were marked as used and it also fixes up the
> + * type IDs on the generated BTF object.
> + */
> +static int btfgen(const char *src_btf, const char *dst_btf, const char *objspaths[])

naming nit: btfgen is actually misleading. pahole is "btfgen", but
this is actually some sort of "BTF minimizer". So something like
"minimize_btf" would be a bit more descriptive

> +{
> +       struct btfgen_info *info;
> +       struct btf *btf_new = NULL;
> +       int err;
> +
> +       info = btfgen_new_info(src_btf);
> +       if (!info) {
> +               p_err("failed to allocate info structure: %s", strerror(errno));
> +               err = -errno;
> +               goto out;
> +       }
> +
> +       for (int i = 0; objspaths[i] != NULL; i++) {
> +               p_info("Processing BPF object: %s", objspaths[i]);
> +
> +               err = btfgen_record_obj(info, objspaths[i]);
> +               if (err)
> +                       goto out;
> +       }
> +
> +       btf_new = btfgen_get_btf(info);
> +       if (!btf_new) {
> +               err = -errno;
> +               p_err("error generating btf: %s", strerror(errno));
> +               goto out;
> +       }
> +
> +       p_info("Creating BTF file: %s", dst_btf);

normally tools don't advertise each action through logs, unless it's
some verbose mode. Let's dop one BTF generated per one bpftool
invocation and drop all this descriptive logging. User will know input
and output BTFs exactly, because they will specify it as input
arguments (so no need to parse any output just to know what went
where, etc).

> +       err = btf_save_raw(btf_new, dst_btf);
> +       if (err) {
> +               p_err("error saving btf file: %s", strerror(errno));
> +               goto out;
> +       }
> +
> +out:
> +       btf__free(btf_new);
> +       btfgen_free_info(info);
> +
> +       return err;
> +}
> +
>  static int do_min_core_btf(int argc, char **argv)
>  {
>         char src_btf_path[PATH_MAX], dst_btf_path[PATH_MAX];
> --
> 2.25.1
>

Powered by blists - more mailing lists