lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzapWKWVzPFeZN5Ms6tOMhrtfpd8aiKenHgf4K4_1fhqMg@mail.gmail.com>
Date: Mon, 2 Jun 2025 17:06:18 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Blake Jones <blakejones@...gle.com>
Cc: Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>, 
	Andrii Nakryiko <andrii@...nel.org>, Martin KaFai Lau <martin.lau@...ux.dev>, 
	Eduard Zingerman <eddyz87@...il.com>, Song Liu <song@...nel.org>, 
	Yonghong Song <yonghong.song@...ux.dev>, John Fastabend <john.fastabend@...il.com>, 
	KP Singh <kpsingh@...nel.org>, Stanislav Fomichev <sdf@...ichev.me>, Hao Luo <haoluo@...gle.com>, 
	Jiri Olsa <jolsa@...nel.org>, Mykola Lysenko <mykolal@...com>, Shuah Khan <shuah@...nel.org>, 
	Ihor Solodrai <ihor.solodrai@...ux.dev>, Namhyung Kim <namhyung@...nel.org>, 
	Ian Rogers <irogers@...gle.com>, bpf@...r.kernel.org, linux-kernel@...r.kernel.org, 
	linux-kselftest@...r.kernel.org
Subject: Re: [PATCH] libbpf: add support for printing BTF character arrays as strings

On Sat, May 31, 2025 at 12:20 AM Blake Jones <blakejones@...gle.com> wrote:
>
> The BTF dumper code currently displays arrays of characters as just that -
> arrays, with each character formatted individually. Sometimes this is what
> makes sense, but it's nice to be able to treat that array as a string.
>
> This change adds a special case to the btf_dump functionality to allow
> arrays of single-byte integer values to be printed as character strings.
> Characters for which isprint() returns false are printed as hex-escaped
> values. This is enabled when the new ".print_strings" is set to 1 in the
> btf_dump_type_data_opts structure.
>
> As an example, here's what it looks like to dump the string "hello" using
> a few different field values for btf_dump_type_data_opts (.compact = 1):
>
> - .print_strings = 0, .skip_names = 0:  (char[6])['h','e','l','l','o',]
> - .print_strings = 0, .skip_names = 1:  ['h','e','l','l','o',]
> - .print_strings = 1, .skip_names = 0:  (char[6])"hello"
> - .print_strings = 1, .skip_names = 1:  "hello"
>
> Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
>
> - .print_strings = 0:  ['h',-1,]
> - .print_strings = 1:  "h\xff"
>
> Signed-off-by: Blake Jones <blakejones@...gle.com>
> ---
>  tools/lib/bpf/btf.h                           |   3 +-
>  tools/lib/bpf/btf_dump.c                      |  51 ++++++++-
>  .../selftests/bpf/prog_tests/btf_dump.c       | 102 ++++++++++++++++++
>  3 files changed, 154 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index 4392451d634b..be8e8e26d245 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {
>         bool compact;           /* no newlines/indentation */
>         bool skip_names;        /* skip member/type names */
>         bool emit_zeroes;       /* show 0-valued fields */
> +       bool print_strings;     /* print char arrays as strings */

let's use "emit_strings" naming, so it's consistent with emit_zeroes?

>         size_t :0;
>  };
> -#define btf_dump_type_data_opts__last_field emit_zeroes
> +#define btf_dump_type_data_opts__last_field print_strings
>
>  LIBBPF_API int
>  btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> index 460c3e57fadb..a07dd5accdd8 100644
> --- a/tools/lib/bpf/btf_dump.c
> +++ b/tools/lib/bpf/btf_dump.c
> @@ -75,6 +75,7 @@ struct btf_dump_data {
>         bool is_array_member;
>         bool is_array_terminated;
>         bool is_array_char;
> +       bool print_strings;

ditto, emit_strings (and maybe put it next to emit_zeroes then)

>  };
>
>  struct btf_dump {
> @@ -2028,6 +2029,50 @@ static int btf_dump_var_data(struct btf_dump *d,
>         return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
>  }
>
> +static int btf_dump_string_data(struct btf_dump *d,
> +                               const struct btf_type *t,
> +                               __u32 id,
> +                               const void *data)
> +{
> +       const struct btf_array *array = btf_array(t);
> +       __u32 i;
> +
> +       if (!btf_is_int(skip_mods_and_typedefs(d->btf, array->type, NULL)) ||
> +           btf__resolve_size(d->btf, array->type) != 1 ||
> +           !d->typed_dump->print_strings) {
> +               pr_warn("unexpected %s() call for array type %u\n",
> +                       __func__, array->type);
> +               return -EINVAL;
> +       }
> +

IMO, a bit too defensive. You literally checked that we have char[] in
the caller, I think it's fine not to double-check that here, let's
drop this

> +       btf_dump_data_pfx(d);
> +       btf_dump_printf(d, "\"");
> +
> +       for (i = 0; i < array->nelems; i++, data++) {
> +               char c;
> +
> +               if (data >= d->typed_dump->data_end)
> +                       return -E2BIG;
> +
> +               c = *(char *)data;
> +               if (c == '\0') {
> +                       /* When printing character arrays as strings, NUL bytes
> +                        * are always treated as string terminators; they are
> +                        * never printed.
> +                        */
> +                       break;

what if there are non-zero characters after the terminating zero?
should we keep going and if there is any non-zero one, still emit
them? or maybe that should be an extra option?... When capturing some
data and dumping, it might be important to know all the contents (it
might be garbage or not, but you'll still see non-garbage values
before \0, so maybe it's fine to always do it?)

> +               }
> +               if (isprint(c))
> +                       btf_dump_printf(d, "%c", c);
> +               else
> +                       btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
> +       }
> +
> +       btf_dump_printf(d, "\"");
> +
> +       return 0;
> +}
> +

[...]

> +/*
> + * String-like types are generally not named, so they need to be
> + * found this way rather than via btf__find_by_name().
> + */
> +static int find_char_array_type(struct btf *btf, int nelems)
> +{
> +       const int nr_types = btf__type_cnt(btf);
> +       const int char_type = btf__find_by_name(btf, "char");
> +
> +       for (int i = 1; i < nr_types; i++) {
> +               const struct btf_type *t;
> +               const struct btf_array *at;
> +
> +               t = btf__type_by_id(btf, i);
> +               if (btf_kind(t) != BTF_KIND_ARRAY)

btf_is_array()

> +                       continue;
> +
> +               at = btf_array(t);
> +               if (at->nelems == nelems && at->type == char_type)
> +                       return i;
> +       }
> +
> +       return -ENOENT;
> +}
> +
> +static int btf_dump_string_data(struct btf *btf, struct btf_dump *d,
> +                               char *str, struct btf_dump_type_data_opts *opts,
> +                               char *ptr, size_t ptr_sz,
> +                               const char *expected_val)
> +{
> +       char name[64];
> +       size_t type_sz;
> +       int type_id;
> +       int ret = 0;
> +
> +       snprintf(name, sizeof(name), "char[%zu]", ptr_sz);
> +       type_id = find_char_array_type(btf, ptr_sz);

instead of trying to find a suitable type in kernel BTF, just generate
a tiny custom BTF with necessary char[N] types? see btf__add_xxx()
usage for an example.

> +       if (!ASSERT_GE(type_id, 0, "find type id"))
> +               return -ENOENT;
> +       type_sz = btf__resolve_size(btf, type_id);
> +       str[0] = '\0';
> +       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, opts);
> +       if (type_sz <= ptr_sz) {
> +               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
> +                       return -EINVAL;
> +       } else {
> +               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
> +                       return -EINVAL;
> +       }
> +       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
> +               return -EFAULT;
> +       return 0;
> +}
> +

[...]

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ