linux-kernel - Re: [PATCH bpf-next v3 2/3] bpf: btf: add btf print functionality

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180709205612.5041acc9@cakuba.lan>
Date:   Mon, 9 Jul 2018 20:56:12 -0700
From:   Jakub Kicinski <jakub.kicinski@...ronome.com>
To:     Okash Khawaja <osk@...com>
Cc:     Daniel Borkmann <daniel@...earbox.net>,
        Martin KaFai Lau <kafai@...com>,
        Alexei Starovoitov <ast@...nel.org>,
        Yonghong Song <yhs@...com>,
        Quentin Monnet <quentin.monnet@...ronome.com>,
        "David S. Miller" <davem@...emloft.net>, <netdev@...r.kernel.org>,
        <kernel-team@...com>, <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH bpf-next v3 2/3] bpf: btf: add btf print functionality

On Sun, 8 Jul 2018 13:30:04 -0700, Okash Khawaja wrote:
> This consumes functionality exported in the previous patch. It does the
> main job of printing with BTF data. This is used in the following patch
> to provide a more readable output of a map's dump. It relies on
> json_writer to do json printing. Below is sample output where map keys
> are ints and values are of type struct A:
> 
> typedef int int_type;
> enum E {
>         E0,
>         E1,
> };
> 
> struct B {
>         int x;
>         int y;
> };
> 
> struct A {
>         int m;
>         unsigned long long n;
>         char o;
>         int p[8];
>         int q[4][8];
>         enum E r;
>         void *s;
>         struct B t;
>         const int u;
>         int_type v;
>         unsigned int w1: 3;
>         unsigned int w2: 3;
> };
> 
> $ sudo bpftool map dump id 14
> [{
>         "key": 0,
>         "value": {
>             "m": 1,
>             "n": 2,
>             "o": "c",
>             "p": [15,16,17,18,15,16,17,18
>             ],
>             "q": [[25,26,27,28,25,26,27,28
>                 ],[35,36,37,38,35,36,37,38
>                 ],[45,46,47,48,45,46,47,48
>                 ],[55,56,57,58,55,56,57,58
>                 ]
>             ],
>             "r": 1,
>             "s": 0x7ffd80531cf8,
>             "t": {
>                 "x": 5,
>                 "y": 10
>             },
>             "u": 100,
>             "v": 20,
>             "w1": 0x7,
>             "w2": 0x3
>         }
>     }
> ]
> 
> This patch uses json's {} and [] to imply struct/union and array. More
> explicit information can be added later. For example, a command line
> option can be introduced to print whether a key or value is struct
> or union, name of a struct etc. This will however come at the expense
> of duplicating info when, for example, printing an array of structs.
> enums are printed as ints without their names.
> 
> Signed-off-by: Okash Khawaja <osk@...com>
> Acked-by: Martin KaFai Lau <kafai@...com>
> 
> ---
>  tools/bpf/bpftool/btf_dumper.c |  253 +++++++++++++++++++++++++++++++++++++++++
>  tools/bpf/bpftool/main.h       |   15 ++
>  2 files changed, 268 insertions(+)
> 
> --- /dev/null
> +++ b/tools/bpf/bpftool/btf_dumper.c
> @@ -0,0 +1,253 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2018 Facebook */
> +
> +#include <linux/btf.h>
> +#include <linux/err.h>
> +#include <stdio.h> /* for (FILE *) used by json_writer */
> +#include <linux/bitops.h>
> +#include <string.h>
> +#include <ctype.h>

fwiw: the preferred ordering would have been:

#include <ctype.h>
#include <stdio.h> /* for (FILE *) used by json_writer */
#include <string.h>
#include <linux/bitops.h>
#include <linux/btf.h>
#include <linux/err.h>

> +#include "btf.h"
> +#include "json_writer.h"
> +#include "main.h"
> +
> +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
> +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
> +#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
> +#define BITS_ROUNDUP_BYTES(bits) \
> +	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
> +const int one = 1;
> +#define is_big_endian() ((*(char *)&one) == 0)

Could we try to do this at compilation time?  Without the variable? :(

#include <asm/byteorder.h>

#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
	return true;
#else
	return false;
#endif

We could also just include endian.h, but since it's a non-standard
extension perhaps using kernel header is a safer bet.

> +static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
> +			      __u8 bit_offset, const void *data);
> +
> +static void btf_dumper_ptr(const void *data, json_writer_t *jw,
> +			   bool is_plain_text)
> +{
> +	if (is_plain_text)
> +		jsonw_printf(jw, "%p", *((unsigned long *)data));
> +	else
> +		jsonw_printf(jw, "%u", *((unsigned long *)data));

nit: I think you missed these parenthesis

> +}
> +

> +static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset,
> +				const void *data, json_writer_t *jw,
> +				bool is_plain_text)
> +{
> +	int left_shift_bits, right_shift_bits;
> +	int nr_bits = BTF_INT_BITS(int_type);
> +	int total_bits_offset;
> +	int bytes_to_copy;
> +	int bits_to_copy;
> +	__u64 print_num;
> +
> +	total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
> +	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
> +	bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
> +	bits_to_copy = bit_offset + nr_bits;
> +	bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
> +
> +	print_num = 0;
> +	memcpy(&print_num, data, bytes_to_copy);
> +	if (is_big_endian()) {
> +		left_shift_bits = bit_offset;
> +		right_shift_bits = 64 - nr_bits;
> +	} else {
> +		left_shift_bits = 64 - bits_to_copy;
> +		right_shift_bits = 64 - nr_bits;
> +	}

Or you can just put the #if here, since it's the only use.

> +	print_num <<= left_shift_bits;
> +	print_num >>= right_shift_bits;
> +	if (is_plain_text)
> +		jsonw_printf(jw, "0x%llx", print_num);
> +	else
> +		jsonw_printf(jw, "%llu", print_num);
> +}
> +
> +static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
> +			  const void *data, json_writer_t *jw,
> +			  bool is_plain_text)
> +{
> +	__u32 *int_type;
> +	__u32 nr_bits;
> +
> +	int_type = (__u32 *)(t + 1);
> +	nr_bits = BTF_INT_BITS(*int_type);
> +	/* if this is bit field */
> +	if (bit_offset || BTF_INT_OFFSET(*int_type) ||
> +	    BITS_PER_BYTE_MASKED(nr_bits)) {
> +		btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> +				    is_plain_text);
> +		return 0;
> +	}
> +
> +	switch (BTF_INT_ENCODING(*int_type)) {
> +	case 0:
> +		if (BTF_INT_BITS(*int_type) == 64)
> +			jsonw_printf(jw, "%lu", *((__u64 *)data));

nit: more parenthesis here

> +		else if (BTF_INT_BITS(*int_type) == 32)
> +			jsonw_printf(jw, "%u", *((__u32 *)data));
> +		else if (BTF_INT_BITS(*int_type) == 16)
> +			jsonw_printf(jw, "%hu", *((__u16 *)data));
> +		else if (BTF_INT_BITS(*int_type) == 8)
> +			jsonw_printf(jw, "%hhu", *((__u8 *)data));
> +		else
> +			btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> +					    is_plain_text);
> +		break;
> +	case BTF_INT_SIGNED:
> +		if (BTF_INT_BITS(*int_type) == 64)
> +			jsonw_printf(jw, "%ld", *((long long *)data));
> +		else if (BTF_INT_BITS(*int_type) == 32)
> +			jsonw_printf(jw, "%d", *((int *)data));
> +		else if (BTF_INT_BITS(*int_type) == 16)
> +			jsonw_printf(jw, "%hd", *((short *)data));
> +		else if (BTF_INT_BITS(*int_type) == 8)
> +			jsonw_printf(jw, "%hhd", *((char *)data));
> +		else
> +			btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> +					    is_plain_text);
> +		break;
> +	case BTF_INT_CHAR:
> +		if (*((char *)data) == '\0')

nit: here too, etc..

> +			jsonw_null(jw);

I don't think the null is good.  I thought I mentioned that?  Look for
example at Python:

>>> import json
>>> thing = json.loads('{"a": [97, 98, 99, 100]}')
>>> bytearray(thing["str"]).decode('utf-8')
'abcd'
>>> "".join(map(chr, thing["str"]))
'abcd'
>>> thing = json.loads('{"str": [97, 98, 99, 100, null]}')
>>> bytearray(thing["str"]).decode('utf-8')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: an integer is required
>>> "".join(map(chr, thing["str"]))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: an integer is required (got type NoneType)

If you start putting nulls into the array the conversion to a string
will become more difficult, won't it?  Do you have a use case where
this helps?  Maybe my Python-foo is not strong enough?

> +		else if (isprint(*((char *)data)))
> +			jsonw_printf(jw, "\"%c\"", *((char *)data));
> +		else
> +			if (is_plain_text)
> +				jsonw_printf(jw, "0x%hhx", *((char *)data));
> +			else
> +				jsonw_printf(jw, "\"\\u00%02hhx\"",
> +					     *((char *)data));
> +		break;
> +	case BTF_INT_BOOL:
> +		jsonw_bool(jw, *((int *)data));
> +		break;
> +	default:
> +		/* shouldn't happen */
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id,
> +			     const void *data)
> +{
> +	const struct btf_type *t;
> +	struct btf_member *m;
> +	int ret = 0;
> +	int i, vlen;
> +
> +	t = btf__type_by_id(d->btf, type_id);
> +	if (!t)
> +		return -EINVAL;
> +
> +	vlen = BTF_INFO_VLEN(t->info);
> +	jsonw_start_object(d->jw);
> +	m = (struct btf_member *)(t + 1);
> +
> +	for (i = 0; i < vlen; i++) {
> +		const void *data_off = data +
> +				       BITS_ROUNDDOWN_BYTES(m[i].offset);

nit: empty line between variable declaration and code, perhaps also
don't init inline since it doesn't fit that way?

> +		jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
> +		ret = btf_dumper_do_type(d, m[i].type,
> +					 BITS_PER_BYTE_MASKED(m[i].offset),
> +					 data_off);
> +		if (ret)
> +			break;
> +	}
> +
> +	jsonw_end_object(d->jw);
> +
> +	return ret;
> +}

Thanks for all the changes you've made so far!