[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <871019a0-71f8-c26d-0ae8-c7fd8c8867fc@fb.com>
Date: Mon, 22 Jan 2018 10:28:11 -0800
From: Yonghong Song <yhs@...com>
To: Arnaldo Carvalho de Melo <acme@...nel.org>,
Gianluca Borello <g.borello@...il.com>
CC: Daniel Borkmann <daniel@...earbox.net>,
Alexei Starovoitov <ast@...nel.org>,
David Miller <davem@...emloft.net>,
Linux Networking Development Mailing List
<netdev@...r.kernel.org>
Subject: Re: len = bpf_probe_read_str(); bpf_perf_event_output(... len) ==
FAIL
On 1/22/18 7:06 AM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Nov 22, 2017 at 10:42:22AM -0800, Gianluca Borello escreveu:
>> On Tue, Nov 21, 2017 at 2:31 PM, Alexei Starovoitov
>> <alexei.starovoitov@...il.com> wrote:
>>>
>>> yeah sorry about this hack. Gianluca reported this issue as well.
>>> Yonghong fixed it for bpf_probe_read only. We will extend
>>> the fix to bpf_probe_read_str() and bpf_perf_event_output() asap.
>>> The above workaround gets too much into llvm and verifier details
>>> we should strive to make bpf program writing as easy as possible.
>>>
>>
>> Hi Arnaldo
>>
>> With the help of Alexei, Daniel and Yonghong I just submitted a new
>> series ("bpf: fix semantics issues with helpers receiving NULL
>> arguments") that includes a fix in bpf_perf_event_output. This should
>> simplify the way you write your bpf programs, so you shouldn't be
>> required to write those convoluted branches anymore (there are a few
>> usage examples in the commit log).
>>
>> In my case it made writing the code much easier, after applying it I
>> haven't been surprised by the compiler output in a while, and I hope
>> your experience will be improved as well.
>
> Trying to work with this again, and I still need to trick clang into not
> doing some optimizations that end up getting the resulting eBPF object
> rejected by the kernel verifier:
>
> [root@...et bpf]# uname -a
> Linux jouet 4.15.0-rc8+ #1 SMP Wed Jan 17 11:01:34 -03 2018 x86_64 x86_64 x86_64 GNU/Linux
> [root@...et bpf]# grep -i bpf /lib/modules/`uname -r`/build/.config
> CONFIG_CGROUP_BPF=y
> CONFIG_BPF=y
> CONFIG_BPF_SYSCALL=y
> CONFIG_BPF_JIT_ALWAYS_ON=y
> # CONFIG_NETFILTER_XT_MATCH_BPF is not set
> # CONFIG_NET_CLS_BPF is not set
> # CONFIG_NET_ACT_BPF is not set
> CONFIG_BPF_JIT=y
> CONFIG_BPF_STREAM_PARSER=y
> CONFIG_LWTUNNEL_BPF=y
> CONFIG_HAVE_EBPF_JIT=y
> CONFIG_BPF_EVENTS=y
> # CONFIG_TEST_BPF is not set
> [root@...et bpf]# cat sys_enter_open.c
> #include "bpf.h"
>
> SEC("syscalls:sys_enter_open")
> int func(void *ctx)
> {
> struct {
> char *ptr;
> char path[256];
> } filename = {
> /*
> * /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format:
> *
> * ...
> * field:const char * filename; offset:16; size:8; signed:0;
> * ...
> * ctx + 16 selects 'filename'
> */
> .ptr = *((char **)(ctx + 16)),
> };
> int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr);
> if (len > 0 && len < 256)
> perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr));
> return 0;
> }
> [root@...et bpf]#
> [root@...et bpf]# perf trace -v -e open,sys_enter_open.c
> bpf: builtin compilation failed: -95, try external compiler
> Kernel build dir is set to /lib/modules/4.15.0-rc8+/build
> set env: KBUILD_DIR=/lib/modules/4.15.0-rc8+/build
> unset env: KBUILD_OPTS
> include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h
> set env: NR_CPUS=4
> set env: LINUX_VERSION_CODE=0x40f00
> set env: CLANG_EXEC=/usr/local/bin/clang
> unset env: CLANG_OPTIONS
> set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h
> set env: WORKING_DIR=/lib/modules/4.15.0-rc8+/build
> set env: CLANG_SOURCE=/home/acme/bpf/sys_enter_open.c
> llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o -
> libbpf: loading object 'sys_enter_open.c' from buffer
> libbpf: section .strtab, size 101, link 0, flags 0, type=3
> libbpf: section .text, size 0, link 0, flags 6, type=1
> libbpf: section syscalls:sys_enter_open, size 472, link 0, flags 6, type=1
> libbpf: found program syscalls:sys_enter_open
> libbpf: section .relsyscalls:sys_enter_open, size 16, link 8, flags 0, type=9
> libbpf: section maps, size 16, link 0, flags 3, type=1
> libbpf: section license, size 4, link 0, flags 3, type=1
> libbpf: license of sys_enter_open.c is GPL
> libbpf: section version, size 4, link 0, flags 3, type=1
> libbpf: kernel version of sys_enter_open.c is 40f00
> libbpf: section .symtab, size 144, link 1, flags 0, type=2
> libbpf: maps in sys_enter_open.c: 1 maps in 16 bytes
> libbpf: map 0 is "__bpf_stdout__"
> libbpf: collecting relocating info for: 'syscalls:sys_enter_open'
> libbpf: relocation: insn_idx=51
> libbpf: relocation: find map 0 (__bpf_stdout__) for insn 51
> LLVM: dumping sys_enter_open.o
> bpf: config program 'syscalls:sys_enter_open'
> libbpf: create map __bpf_stdout__: fd=3
> libbpf: load bpf program failed: Permission denied
> libbpf: -- BEGIN DUMP LOG ---
> libbpf:
> 0: (bf) r6 = r1
> 1: (b7) r1 = 0
> 2: (7b) *(u64 *)(r10 -8) = r1
> 3: (7b) *(u64 *)(r10 -16) = r1
> 4: (7b) *(u64 *)(r10 -24) = r1
> 5: (7b) *(u64 *)(r10 -32) = r1
> 6: (7b) *(u64 *)(r10 -40) = r1
> 7: (7b) *(u64 *)(r10 -48) = r1
> 8: (7b) *(u64 *)(r10 -56) = r1
> 9: (7b) *(u64 *)(r10 -64) = r1
> 10: (7b) *(u64 *)(r10 -72) = r1
> 11: (7b) *(u64 *)(r10 -80) = r1
> 12: (7b) *(u64 *)(r10 -88) = r1
> 13: (7b) *(u64 *)(r10 -96) = r1
> 14: (7b) *(u64 *)(r10 -104) = r1
> 15: (7b) *(u64 *)(r10 -112) = r1
> 16: (7b) *(u64 *)(r10 -120) = r1
> 17: (7b) *(u64 *)(r10 -128) = r1
> 18: (7b) *(u64 *)(r10 -136) = r1
> 19: (7b) *(u64 *)(r10 -144) = r1
> 20: (7b) *(u64 *)(r10 -152) = r1
> 21: (7b) *(u64 *)(r10 -160) = r1
> 22: (7b) *(u64 *)(r10 -168) = r1
> 23: (7b) *(u64 *)(r10 -176) = r1
> 24: (7b) *(u64 *)(r10 -184) = r1
> 25: (7b) *(u64 *)(r10 -192) = r1
> 26: (7b) *(u64 *)(r10 -200) = r1
> 27: (7b) *(u64 *)(r10 -208) = r1
> 28: (7b) *(u64 *)(r10 -216) = r1
> 29: (7b) *(u64 *)(r10 -224) = r1
> 30: (7b) *(u64 *)(r10 -232) = r1
> 31: (7b) *(u64 *)(r10 -240) = r1
> 32: (7b) *(u64 *)(r10 -248) = r1
> 33: (7b) *(u64 *)(r10 -256) = r1
> 34: (79) r3 = *(u64 *)(r6 +16)
> 35: (7b) *(u64 *)(r10 -264) = r3
> 36: (bf) r1 = r10
> 37: (07) r1 += -256
> 38: (b7) r2 = 256
> 39: (85) call bpf_probe_read_str#45
> 40: (bf) r1 = r0
> 41: (07) r1 += -1
> 42: (67) r1 <<= 32
> 43: (77) r1 >>= 32
> 44: (25) if r1 > 0xfe goto pc+12
> R0=inv(id=0) R1=inv(id=0,umax_value=254,var_off=(0x0; 0xff)) R6=ctx(id=0,off=0,imm=0) R10=fp0
> 45: (67) r0 <<= 32
> 46: (c7) r0 s>>= 32
> 47: (07) r0 += 8
> 48: (bf) r4 = r10
> 49: (07) r4 += -264
> 50: (bf) r1 = r6
> 51: (18) r2 = 0xffff957fc90e1000
> 53: (18) r3 = 0xffffffff
> 55: (bf) r5 = r0
> 56: (85) call bpf_perf_event_output#25
> R5 min value is negative, either use unsigned or 'var &= const'
Yes, I can reproduce the issue as well. The reason is again due to
compiler optimization,
> 39: (85) call bpf_probe_read_str#45
> 40: (bf) r1 = r0
> 41: (07) r1 += -1
> 42: (67) r1 <<= 32
> 43: (77) r1 >>= 32
> 44: (25) if r1 > 0xfe goto pc+12
> R0=inv(id=0) R1=inv(id=0,umax_value=254,var_off=(0x0; 0xff))
R6=ctx(id=0,off=0,imm=0) R10=fp0
> 45: (67) r0 <<= 32
> 46: (c7) r0 s>>= 32
> 47: (07) r0 += 8
> 48: (bf) r4 = r10
> 49: (07) r4 += -264
> 50: (bf) r1 = r6
> 51: (18) r2 = 0xffff957fc90e1000
> 53: (18) r3 = 0xffffffff
> 55: (bf) r5 = r0
> 56: (85) call bpf_perf_event_output#25
> R5 min value is negative, either use unsigned or 'var &= const'
The compiler did "40: (bf) r1 = r0" and then uses "r1" for branch
comparison, the original "r0" is left with complete unknown integer
value and later used to calculate the buffer size "55: (bf) r5 = r0"
where "r5" could be negative value and the verifier rightfully
complains.
There is no easy way to fix this in verifier unless verifier starts to
track correlations between registers which is a big task. So your below
workaround is okay. The below workaround should also work:
int len = bpf_probe_read_str(filename.path,
sizeof(filename.path), filename.ptr);
if (len > 0 && len < 256)
bpf_perf_event_output(ctx, &my_map, BPF_F_CURRENT_CPU,
&filename, (len & 0xff) + sizeof(filename.ptr));
return 0;
Thanks,
>
> libbpf: -- END LOG --
> libbpf: failed to load program 'syscalls:sys_enter_open'
> libbpf: failed to load object 'sys_enter_open.c'
> bpf: load objects failed
> event syntax error: 'sys_enter_open.c'
> \___ Kernel verifier blocks program loading
>
> (add -v to see detail)
> Run 'perf list' for a list of valid events
>
> Usage: perf trace [<options>] [<command>]
> or: perf trace [<options>] -- <command> [<options>]
> or: perf trace record [<options>] [<command>]
> or: perf trace record [<options>] -- <command> [<options>]
>
> -e, --event <event> event/syscall selector. use 'perf list' to list available events
> [root@...et bpf]#
>
> If I use the version we came up before, it works:
>
> [root@...et bpf]# trace -e open,sys_enter_open.c
> LLVM: dumping sys_enter_open.o
> 2171.820 ( ): __bpf_stdout__:@......./proc/self/task/14683/comm..)
> 2171.845 ( 0.339 ms): qemu-system-x8/6721 open(filename: /proc/self/task/14683/comm, flags: RDWR ) = 91
> ^C[root@...et bpf]
> [root@...et bpf]# trace -e open,sys_enter_open.o
> 2485.416 ( ): __bpf_stdout__:Vm..u.../proc/loadavg.......)
> 2485.434 ( 0.210 ms): lighttpd/25120 open(filename: /proc/loadavg, mode: ISGID|IXOTH ) = 8
> ^C[root@...et bpf]#
>
> [root@...et bpf]cat sys_enter_open.c
> #include "bpf.h"
>
> SEC("syscalls:sys_enter_open")
> int func(void *ctx)
> {
> struct {
> char *ptr;
> char path[256];
> } filename = {
> /*
> * /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format:
> *
> * ...
> * field:const char * filename; offset:16; size:8; signed:0;
> * ...
> * ctx + 16 selects 'filename'
> */
> .ptr = *((char **)(ctx + 16)),
> };
> int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr);
> if (len > 0) {
> if (len == 1)
> perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr));
> else if (len < 256)
> perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr));
> }
> return 0;
> }
> [root@...et bpf]#
>
> [acme@...et perf]$ llc --version | head -16
> LLVM (https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=DA8e1B5r073vIqRrFz7MRA&m=kbmGig2gAUdNIT0zFKDollApvFW0R95_U5Ntz4dvRbM&s=zQgAZ9vJnfkoNy-c53Y3H_2nMVwMzV-LyMA2f6bSFrw&e=):
> LLVM version 6.0.0svn
> DEBUG build with assertions.
> Default target: x86_64-unknown-linux-gnu
> Host CPU: broadwell
>
> Registered Targets:
> aarch64 - AArch64 (little endian)
> aarch64_be - AArch64 (big endian)
> amdgcn - AMD GCN GPUs
> arm - ARM
> arm64 - ARM64 (little endian)
> armeb - ARM (big endian)
> bpf - BPF (host endian)
> bpfeb - BPF (big endian)
> bpfel - BPF (little endian)
> [acme@...et perf]$
>
Powered by blists - more mailing lists