linux-kernel - Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180703214559.GA4448@w1t1fb>
Date:   Tue, 3 Jul 2018 22:46:00 +0100
From:   Okash Khawaja <osk@...com>
To:     Jakub Kicinski <jakub.kicinski@...ronome.com>
CC:     Daniel Borkmann <daniel@...earbox.net>,
        Martin KaFai Lau <kafai@...com>,
        Alexei Starovoitov <ast@...nel.org>,
        Yonghong Song <yhs@...com>,
        "Quentin Monnet" <quentin.monnet@...ronome.com>,
        "David S. Miller" <davem@...emloft.net>, <netdev@...r.kernel.org>,
        <kernel-team@...com>, <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Mon, Jul 02, 2018 at 10:06:59PM -0700, Jakub Kicinski wrote:
> On Mon, 2 Jul 2018 11:39:15 -0700, Okash Khawaja wrote:
[...]

> 
> > +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
> > +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
> 
> Perhaps it's just me but BIT_OFFSET or BIT_COUNT as a name of this macro
> would make it more obvious to parse in the code below.
I don't mind either. However these macro names are also used inside
kernel for same purpose. For sake of consistency, I'd recommend we keep
them :)

> 
> > +#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
> > +#define BITS_ROUNDUP_BYTES(bits) \
> > +	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
> > +
> > +static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
> > +			      uint8_t bit_offset, const void *data);
> > +
> > +static void btf_dumper_ptr(const void *data, json_writer_t *jw,
> > +			   bool is_plain_text)
> > +{
> > +	if (is_plain_text)
> > +		jsonw_printf(jw, "%p", *((uintptr_t *)data));
> > +	else
> > +		jsonw_printf(jw, "%u", *((uintptr_t *)data));
> > +}
> > +
> > +static int btf_dumper_modifier(const struct btf_dumper *d, uint32_t type_id,
> > +			       const void *data)
> > +{
> > +	int32_t actual_type_id = btf__resolve_type(d->btf, type_id);
> 
> Please prefer kernel types like __u32 wherever possible.
> 
> > +	int ret;
> > +
> > +	if (actual_type_id < 0)
> > +		return actual_type_id;
> > +
> > +	ret = btf_dumper_do_type(d, actual_type_id, 0, data);
> > +
> > +	return ret;
> 
> ret is unnecessary.
> 
> > +}
> > +
> > +static void btf_dumper_enum(const void *data, json_writer_t *jw)
> > +{
> > +	jsonw_printf(jw, "%d", *((int32_t *)data));
> 
> Unnecessary parenthesis.  There is a lot of those, please remove them
> all.
> 
> > +}
> > +
> > +static int btf_dumper_array(const struct btf_dumper *d, uint32_t type_id,
> > +			    const void *data)
> > +{
> > +	const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> > +	struct btf_array *arr = (struct btf_array *)(t + 1);
> > +	int64_t elem_size;
> > +	int ret = 0;
> > +	uint32_t i;
> > +
> > +	elem_size = btf__resolve_size(d->btf, arr->type);
> > +	if (elem_size < 0)
> > +		return elem_size;
> > +
> > +	jsonw_start_array(d->jw);
> > +	for (i = 0; i < arr->nelems; i++) {
> > +		ret = btf_dumper_do_type(d, arr->type, 0,
> > +					 data + (i * elem_size));
> 
> Unnecessary parenthesis.
> 
> > +		if (ret)
> > +			break;
> > +	}
> > +
> > +	jsonw_end_array(d->jw);
> > +	return ret;
> > +}
> > +
> > +static void btf_dumper_int_bits(uint32_t int_type, uint8_t bit_offset,
> > +				const void *data, json_writer_t *jw,
> > +				bool is_plain_text)
> > +{
> > +	uint32_t bits = BTF_INT_BITS(int_type);
> > +	uint16_t total_bits_offset;
> > +	uint16_t bytes_to_copy;
> > +	uint16_t bits_to_copy;
> 
> Please use normal int types for things which don't have to be
> explicitly sized.  Using explicitly sized variables is bad style, 
> and ALU operations other than on word or byte quantities are usually
> slower on modern CPUs.
> 
> > +	uint8_t upper_bits;
> > +	union {
> > +		uint64_t u64_num;
> > +		uint8_t u8_nums[8];
> 
> Are the int types in BTF constrained to 64bit at most?
> 
> > +	} print_num;
> > +
> > +	total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
> > +	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
> > +	bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
> > +	bits_to_copy = bits + bit_offset;
> > +	bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
> > +
> > +	print_num.u64_num = 0;
> > +	memcpy(&print_num.u64_num, data, bytes_to_copy);
> 
> This scheme is unlikely to work on big endian machines...
Can you give an example how?

> 
> > +	upper_bits = BITS_PER_BYTE_MASKED(bits_to_copy);
> > +	if (upper_bits) {
> > +		uint8_t mask = (1 << upper_bits) - 1;
> > +
> > +		print_num.u8_nums[bytes_to_copy - 1] &= mask;
> > +	}
> > +
> > +	print_num.u64_num >>= bit_offset;
> > +
> > +	if (is_plain_text)
> > +		jsonw_printf(jw, "0x%llx", print_num.u64_num);
> > +	else
> > +		jsonw_printf(jw, "%llu", print_num.u64_num);
> > +}
> > +
> > +static int btf_dumper_int(const struct btf_type *t, uint8_t bit_offset,
> > +			  const void *data, json_writer_t *jw,
> > +			  bool is_plain_text)
> > +{
> > +	uint32_t *int_type = (uint32_t *)(t + 1);
> > +	uint32_t bits = BTF_INT_BITS(*int_type);
> > +	int ret = 0;
> > +
> > +	/* if this is bit field */
> > +	if (bit_offset || BTF_INT_OFFSET(*int_type) ||
> > +	    BITS_PER_BYTE_MASKED(bits)) {
> > +		btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > +				    is_plain_text);
> > +		return ret;
> > +	}
> > +
> > +	switch (BTF_INT_ENCODING(*int_type)) {
> > +	case 0:
> > +		if (BTF_INT_BITS(*int_type) == 64)
> > +			jsonw_printf(jw, "%lu", *((uint64_t *)data));
> > +		else if (BTF_INT_BITS(*int_type) == 32)
> > +			jsonw_printf(jw, "%u", *((uint32_t *)data));
> > +		else if (BTF_INT_BITS(*int_type) == 16)
> > +			jsonw_printf(jw, "%hu", *((uint16_t *)data));
> > +		else if (BTF_INT_BITS(*int_type) == 8)
> > +			jsonw_printf(jw, "%hhu", *((uint8_t *)data));
> > +		else
> > +			btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > +					    is_plain_text);
> > +		break;
> > +	case BTF_INT_SIGNED:
> > +		if (BTF_INT_BITS(*int_type) == 64)
> > +			jsonw_printf(jw, "%ld", *((int64_t *)data));
> > +		else if (BTF_INT_BITS(*int_type) == 32)
> > +			jsonw_printf(jw, "%d", *((int32_t *)data));
> > +		else if (BTF_INT_BITS(*int_type) ==  16)
> 
> Please drop the double space.  Both for 16 where it makes no sense and
> for 8 where it's marginally useful but not really.
> 
> > +			jsonw_printf(jw, "%hd", *((int16_t *)data));
> > +		else if (BTF_INT_BITS(*int_type) ==  8)
> > +			jsonw_printf(jw, "%hhd", *((int8_t *)data));
> > +		else
> > +			btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > +					    is_plain_text);
> > +		break;
> > +	case BTF_INT_CHAR:
> > +		if (*((char *)data) == '\0')
> > +			jsonw_null(jw);
> 
> Mm.. I don't think 0 char is equivalent to null.
Yes, thanks. Will fix.

> 
> > +		else if (isprint(*((char *)data)))
> > +			jsonw_printf(jw, "\"%c\"", *((char *)data));
> 
> This looks very suspicious.  So if I see a "6" for a char field it's
> either a 6 ('\u0006') or a 54 ('6')...
It will always be 54. May be I missed your point. Could you explain why
it would be other than 54?

> 
> > +		else
> > +			if (is_plain_text)
> > +				jsonw_printf(jw, "%hhx", *((char *)data));
> > +			else
> > +				jsonw_printf(jw, "%hhd", *((char *)data));
> 
> ... I think you need to always print a string, and express it as
> \u00%02hhx for non-printable.
Okay that makes sense
> 
> > +		break;
> > +	case BTF_INT_BOOL:
> > +		jsonw_bool(jw, *((int *)data));
> > +		break;
> > +	default:
> > +		/* shouldn't happen */
> > +		ret = -EINVAL;
> 
> You only set ret to something else than 0 here just to break and
> immediately return.  Please remove the ret variable.
> 
> > +		break;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int btf_dumper_struct(const struct btf_dumper *d, uint32_t type_id,
> > +			     const void *data)
> > +{
> > +	const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> 
> Please don't call functions which need error checking as initialized
> the if below looks very awkward..
> 
> > +	struct btf_member *m;
> > +	int ret = 0;
> > +
> > +	int i, vlen;
> > +
> > +	if (!t)
> > +		return -EINVAL;
> > +
> > +	vlen = BTF_INFO_VLEN(t->info);
> > +	jsonw_start_object(d->jw);
> > +	m = (struct btf_member *)(t + 1);
> > +
> > +	for (i = 0; i < vlen; i++) {
> > +		jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
> > +		ret = btf_dumper_do_type(d, m[i].type,
> > +					 BITS_PER_BYTE_MASKED(m[i].offset), data
> > +					 + BITS_ROUNDDOWN_BYTES(m[i].offset));
> 
> Please use a temp variable to avoid this awkward multi-line sum.
> 
> > +		if (ret)
> > +			return ret;
> 
> You can't return without jsonw_end_object().
> 
> > +	}
> > +
> > +	jsonw_end_object(d->jw);
> > +
> > +	return 0;
> > +}
> > +
> > +static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
> > +			      uint8_t bit_offset, const void *data)
> > +{
> > +	const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> > +	int ret = 0;
> > +
> > +	switch (BTF_INFO_KIND(t->info)) {
> > +	case BTF_KIND_INT:
> > +		ret = btf_dumper_int(t, bit_offset, data, d->jw,
> > +				     d->is_plain_text);
> > +		break;
> > +	case BTF_KIND_STRUCT:
> > +	case BTF_KIND_UNION:
> > +		ret = btf_dumper_struct(d, type_id, data);
> > +		break;
> > +	case BTF_KIND_ARRAY:
> > +		ret = btf_dumper_array(d, type_id, data);
> > +		break;
> > +	case BTF_KIND_ENUM:
> > +		btf_dumper_enum(data, d->jw);
> > +		break;
> > +	case BTF_KIND_PTR:
> > +		btf_dumper_ptr(data, d->jw, d->is_plain_text);
> > +		break;
> > +	case BTF_KIND_UNKN:
> > +		jsonw_printf(d->jw, "(unknown)");
> > +		break;
> > +	case BTF_KIND_FWD:
> > +		/* map key or value can't be forward */
> 
> Right, but you have to print _something_, otherwise we would have a
> name without a value, which would break JSON, no?
> 
> > +		ret = -EINVAL;
> > +		break;
> > +	case BTF_KIND_TYPEDEF:
> > +	case BTF_KIND_VOLATILE:
> > +	case BTF_KIND_CONST:
> > +	case BTF_KIND_RESTRICT:
> > +		ret = btf_dumper_modifier(d, type_id, data);
> > +		break;
> > +	default:
> > +		jsonw_printf(d->jw, "(unsupported-kind");
> > +		ret = -EINVAL;
> > +		break;
> > +	}
> > +
> > +	return ret;
> 
> Why return ret; at all, there is no code after the switch just return
> directly from cases and save 9 LOC.
> 
> > +}
> > +
> > +int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
> > +			const void *data)
> > +{
> > +	if (!d)
> > +		return -EINVAL;
> 
> No need for defensive programming.
> 
> > +	return btf_dumper_do_type(d, type_id, 0, data);
> > +}
> > --- /dev/null
> > +++ b/tools/bpf/bpftool/btf_dumper.h
> > @@ -0,0 +1,23 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright (c) 2018 Facebook */
> > +
> > +#ifndef BTF_DUMPER_H
> > +#define BTF_DUMPER_H
> > +
> > +struct btf_dumper {
> > +	const struct btf *btf;
> > +	json_writer_t *jw;
> > +	bool is_plain_text;
> > +};
> > +
> > +/* btf_dumper_type - print data along with type information
> > + * @d: an instance containing context for dumping types
> > + * @type_id: index in btf->types array. this points to the type to be dumped
> > + * @data: pointer the actual data, i.e. the values to be printed
> > + *
> > + * Returns zero on success and negative error code otherwise
> > + */
> > +int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
> > +			const void *data);
> > +
> > +#endif
> 
> Please don't add header files for a single struct and single function.
> Just put this in main.h.

Thanks for your feedback. I'll reply with v3.

Okash