lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <89a6e282-0250-4264-128d-469be99073e9@fb.com>
Date:   Thu, 8 Aug 2019 04:24:25 +0000
From:   Yonghong Song <yhs@...com>
To:     Andrii Nakryiko <andriin@...com>,
        "bpf@...r.kernel.org" <bpf@...r.kernel.org>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Alexei Starovoitov <ast@...com>,
        "daniel@...earbox.net" <daniel@...earbox.net>
CC:     "andrii.nakryiko@...il.com" <andrii.nakryiko@...il.com>,
        Kernel Team <Kernel-team@...com>,
        Masahiro Yamada <yamada.masahiro@...ionext.com>,
        Arnaldo Carvalho de Melo <acme@...hat.com>,
        Jiri Olsa <jolsa@...nel.org>, "Sam Ravnborg" <sam@...nborg.org>
Subject: Re: [PATCH v2 bpf-next] btf: expose BTF info through sysfs



On 8/7/19 5:32 PM, Andrii Nakryiko wrote:
> Make .BTF section allocated and expose its contents through sysfs.
> 
> /sys/kernel/btf directory is created to contain all the BTFs present
> inside kernel. Currently there is only kernel's main BTF, represented as
> /sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported,
> each module will expose its BTF as /sys/kernel/btf/<module-name> file.
> 
> Current approach relies on a few pieces coming together:
> 1. pahole is used to take almost final vmlinux image (modulo .BTF and
>     kallsyms) and generate .BTF section by converting DWARF info into
>     BTF. This section is not allocated and not mapped to any segment,
>     though, so is not yet accessible from inside kernel at runtime.
> 2. objcopy dumps .BTF contents into binary file and subsequently
>     convert binary file into linkable object file with automatically
>     generated symbols _binary__btf_kernel_bin_start and
>     _binary__btf_kernel_bin_end, pointing to start and end, respectively,
>     of BTF raw data.
> 3. final vmlinux image is generated by linking this object file (and
>     kallsyms, if necessary). sysfs_btf.c then creates
>     /sys/kernel/btf/kernel file and exposes embedded BTF contents through
>     it. This allows, e.g., libbpf and bpftool access BTF info at
>     well-known location, without resorting to searching for vmlinux image
>     on disk (location of which is not standardized and vmlinux image
>     might not be even available in some scenarios, e.g., inside qemu
>     during testing).
> 
> Alternative approach using .incbin assembler directive to embed BTF
> contents directly was attempted but didn't work, because sysfs_proc.o is
> not re-compiled during link-vmlinux.sh stage. This is required, though,
> to update embedded BTF data (initially empty data is embedded, then
> pahole generates BTF info and we need to regenerate sysfs_btf.o with
> updated contents, but it's too late at that point).
> 
> If BTF couldn't be generated due to missing or too old pahole,
> sysfs_btf.c handles that gracefully by detecting that
> _binary__btf_kernel_bin_start (weak symbol) is 0 and not creating
> /sys/kernel/btf at all.
> 
> v1->v2:
> - allow kallsyms stage to re-use vmlinux generated by gen_btf();
> 
> Cc: Masahiro Yamada <yamada.masahiro@...ionext.com>
> Cc: Arnaldo Carvalho de Melo <acme@...hat.com>
> Cc: Jiri Olsa <jolsa@...nel.org>
> Cc: Sam Ravnborg <sam@...nborg.org>
> Signed-off-by: Andrii Nakryiko <andriin@...com>
> ---
>   kernel/bpf/Makefile     |  3 +++
>   kernel/bpf/sysfs_btf.c  | 52 ++++++++++++++++++++++++++++++++++++++
>   scripts/link-vmlinux.sh | 55 +++++++++++++++++++++++++++--------------
>   3 files changed, 91 insertions(+), 19 deletions(-)
>   create mode 100644 kernel/bpf/sysfs_btf.c
> 
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 29d781061cd5..e1d9adb212f9 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
>   ifeq ($(CONFIG_INET),y)
>   obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
>   endif
> +ifeq ($(CONFIG_SYSFS),y)
> +obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
> +endif
> diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
> new file mode 100644
> index 000000000000..ac06ce1d62e8
> --- /dev/null
> +++ b/kernel/bpf/sysfs_btf.c
> @@ -0,0 +1,52 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Provide kernel BTF information for introspection and use by eBPF tools.
> + */
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/kobject.h>
> +#include <linux/init.h>
> +
> +/* See scripts/link-vmlinux.sh, gen_btf() func for details */
> +extern char __weak _binary__btf_kernel_bin_start[];
> +extern char __weak _binary__btf_kernel_bin_end[];
> +
> +static ssize_t
> +btf_kernel_read(struct file *file, struct kobject *kobj,
> +		struct bin_attribute *bin_attr,
> +		char *buf, loff_t off, size_t len)
> +{
> +	memcpy(buf, _binary__btf_kernel_bin_start + off, len);
> +	return len;
> +}
> +
> +static struct bin_attribute btf_kernel_attr __ro_after_init = {
> +	.attr = {
> +		.name = "kernel",
> +		.mode = 0444,
> +	},
> +	.read = btf_kernel_read,
> +};
> +
> +static struct bin_attribute *btf_attrs[] __ro_after_init = {
> +	&btf_kernel_attr,
> +	NULL,
> +};
> +
> +static struct attribute_group btf_group_attr __ro_after_init = {
> +	.name = "btf",
> +	.bin_attrs = btf_attrs,
> +};
> +
> +static int __init btf_kernel_init(void)
> +{
> +	if (!_binary__btf_kernel_bin_start)
> +		return 0;
> +
> +	btf_kernel_attr.size = _binary__btf_kernel_bin_end -
> +			       _binary__btf_kernel_bin_start;
> +
> +	return sysfs_create_group(kernel_kobj, &btf_group_attr);
> +}
> +
> +subsys_initcall(btf_kernel_init);
> diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
> index a7124f895b24..e05abe19b11f 100755
> --- a/scripts/link-vmlinux.sh
> +++ b/scripts/link-vmlinux.sh
> @@ -56,8 +56,8 @@ modpost_link()
>   }
>   
>   # Link of vmlinux
> -# ${1} - optional extra .o files
> -# ${2} - output file
> +# ${1} - output file
> +# ${@:2} - optional extra .o files
>   vmlinux_link()
>   {
>   	local lds="${objtree}/${KBUILD_LDS}"
> @@ -70,9 +70,9 @@ vmlinux_link()
>   			--start-group				\
>   			${KBUILD_VMLINUX_LIBS}			\
>   			--end-group				\
> -			${1}"
> +			${@:2}"
>   
> -		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}	\
> +		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1}	\
>   			-T ${lds} ${objects}
>   	else
>   		objects="-Wl,--whole-archive			\
> @@ -81,9 +81,9 @@ vmlinux_link()
>   			-Wl,--start-group			\
>   			${KBUILD_VMLINUX_LIBS}			\
>   			-Wl,--end-group				\
> -			${1}"
> +			${@:2}"
>   
> -		${CC} ${CFLAGS_vmlinux} -o ${2}			\
> +		${CC} ${CFLAGS_vmlinux} -o ${1}			\
>   			-Wl,-T,${lds}				\
>   			${objects}				\
>   			-lutil -lrt -lpthread
> @@ -92,23 +92,34 @@ vmlinux_link()
>   }
>   
>   # generate .BTF typeinfo from DWARF debuginfo
> +# ${1} - vmlinux image
> +# ${2} - file to dump raw BTF data into
>   gen_btf()
>   {
> -	local pahole_ver;
> +	local pahole_ver
> +	local bin_arch
>   
>   	if ! [ -x "$(command -v ${PAHOLE})" ]; then
>   		info "BTF" "${1}: pahole (${PAHOLE}) is not available"
> -		return 0
> +		return 1
>   	fi
>   
>   	pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
>   	if [ "${pahole_ver}" -lt "113" ]; then
>   		info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
> -		return 0
> +		return 1
>   	fi
>   
> -	info "BTF" ${1}
> +	info "BTF" ${2}
> +	vmlinux_link ${1}
>   	LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
> +
> +	# dump .BTF section into raw binary file to link with final vmlinux
> +	bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \
> +		cut -d, -f1 | cut -d' ' -f2)
> +	${OBJCOPY} --dump-section .BTF=.btf.kernel.bin ${1} 2>/dev/null
> +	${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \
> +		--rename-section .data=.BTF .btf.kernel.bin ${2}

Currently, the binary size on my config is about 2.6MB. Do you think
we could or need to compress it to make it smaller? I tried gzip
and the compressed size is 0.9MB.

>   }
>   
>   # Create ${2} .o file with all symbols from the ${1} object file
> @@ -153,6 +164,7 @@ sortextable()
>   # Delete output files in case of error
>   cleanup()
>   {
> +	rm -f .btf.*
>   	rm -f .tmp_System.map
>   	rm -f .tmp_kallsyms*
>   	rm -f .tmp_vmlinux*
> @@ -215,6 +227,13 @@ ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
>   info MODINFO modules.builtin.modinfo
>   ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
>   
> +btf_kernel_bin_o=""
> +if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
> +	if gen_btf .tmp_vmlinux1 .btf.kernel.bin.o ; then
> +		btf_kernel_bin_o=.btf.kernel.bin.o
> +	fi
> +fi
> +
>   kallsymso=""
>   kallsyms_vmlinux=""
>   if [ -n "${CONFIG_KALLSYMS}" ]; then
> @@ -246,11 +265,14 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
>   	kallsyms_vmlinux=.tmp_vmlinux2
>   
>   	# step 1
> -	vmlinux_link "" .tmp_vmlinux1
> +	# skip building .tmp_vmlinux1 if gen_btf() already did that
> +	if [ -z "${btf_kernel_bin_o}" ]; then
> +		vmlinux_link .tmp_vmlinux1
> +	fi
>   	kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o
>   
>   	# step 2
> -	vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2
> +	vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_kernel_bin_o}
>   	kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o
>   
>   	# step 3
> @@ -261,18 +283,13 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
>   		kallsymso=.tmp_kallsyms3.o
>   		kallsyms_vmlinux=.tmp_vmlinux3
>   
> -		vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3
> -
> +		vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_kernel_bin_o}
>   		kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o
>   	fi
>   fi
>   
>   info LD vmlinux
> -vmlinux_link "${kallsymso}" vmlinux
> -
> -if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
> -	gen_btf vmlinux
> -fi
> +vmlinux_link vmlinux "${kallsymso}" "${btf_kernel_bin_o}"
>   
>   if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then
>   	info SORTEX vmlinux
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ