lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181101185217.GA20495@kernel.org>
Date:   Thu, 1 Nov 2018 15:52:17 -0300
From:   Arnaldo Carvalho de Melo <acme@...nel.org>
To:     Yonghong Song <yhs@...com>
Cc:     Daniel Borkmann <daniel@...earbox.net>,
        Jiri Olsa <jolsa@...hat.com>, Martin Lau <kafai@...com>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>,
        Linux Networking Development Mailing List 
        <netdev@...r.kernel.org>
Subject: Help with the BPF verifier

tl;dr: I seem to be trying to get past clang optimizations that get the
       verifier to accept my proggie.

Hi,

	So I'm moving to use raw_syscalls:sys_exit to collect pointer
contents, using maps to tell the bpf program what to copy, how many
bytes, filters, etc.

	I'm at the start of it at this point I need to use an index to
get to the right syscall arg that is a filename, starting just with
"open" and "openat", that have the filename in different args, so to get
this first part working I'm doing it directly in the bpf restricted C
program, later this will be to maps, etc, so if I set the index as a
constant, just for testing, it works, look at the "open" and "openat"
calls below, later we'll see why openat is failing to augment its
"filename" arg while "open" works:

[root@...enth perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
         ? (         ): sleep/10152  ... [continued]: execve()) = 0
     0.045 ( 0.004 ms): sleep/10152 brk() = 0x55ccff356000
     0.074 ( 0.007 ms): sleep/10152 access(filename: , mode: R) = -1 ENOENT No such file or directory
     0.089 ( 0.006 ms): sleep/10152 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3
     0.097 ( 0.003 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd283f0) = 0
     0.103 ( 0.006 ms): sleep/10152 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) = 0x7f8ffee9c000
     0.111 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.135 ( 0.007 ms): sleep/10152 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3
     0.144 ( 0.003 ms): sleep/10152 read(fd: 3, buf: 0x7ffecdd285b8, count: 832) = 832
     0.150 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd28450) = 0
     0.155 ( 0.005 ms): sleep/10152 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7f8ffee9a000
     0.166 ( 0.007 ms): sleep/10152 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7f8ffe8dc000
     0.175 ( 0.010 ms): sleep/10152 mprotect(start: 0x7f8ffea89000, len: 2093056) = 0
     0.188 ( 0.010 ms): sleep/10152 mmap(addr: 0x7f8ffec88000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7f8ffec88000
     0.204 ( 0.005 ms): sleep/10152 mmap(addr: 0x7f8ffec8e000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f8ffec8e000
     0.218 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.239 ( 0.002 ms): sleep/10152 arch_prctl(option: 4098, arg2: 140256433779968) = 0
     0.312 ( 0.009 ms): sleep/10152 mprotect(start: 0x7f8ffec88000, len: 16384, prot: READ) = 0
     0.343 ( 0.005 ms): sleep/10152 mprotect(start: 0x55ccff1c6000, len: 4096, prot: READ) = 0
     0.354 ( 0.006 ms): sleep/10152 mprotect(start: 0x7f8ffeeb6000, len: 4096, prot: READ) = 0
     0.362 ( 0.019 ms): sleep/10152 munmap(addr: 0x7f8ffee9c000, len: 103334) = 0
     0.476 ( 0.002 ms): sleep/10152 brk() = 0x55ccff356000
     0.480 ( 0.004 ms): sleep/10152 brk(brk: 0x55ccff377000) = 0x55ccff377000
     0.487 ( 0.002 ms): sleep/10152 brk() = 0x55ccff377000
     0.497 ( 0.008 ms): sleep/10152 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3
     0.507 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7f8ffec8daa0) = 0
     0.511 ( 0.006 ms): sleep/10152 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) = 0x7f8ff7d0d000
     0.524 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.574 (1000.140 ms): sleep/10152 nanosleep(rqtp: 0x7ffecdd29130) = 0
  1000.753 ( 0.007 ms): sleep/10152 close(fd: 1) = 0
  1000.767 ( 0.004 ms): sleep/10152 close(fd: 2) = 0
  1000.781 (         ): sleep/10152 exit_group()
[root@...enth perf]# 

     1	// SPDX-License-Identifier: GPL-2.0
     2	/*
     3	 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
     4	 *
     5	 * Test it with:
     6	 *
     7	 * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
     8	 *
     9	 * This exactly matches what is marshalled into the raw_syscall:sys_enter
    10	 * payload expected by the 'perf trace' beautifiers.
    11	 *
    12	 * For now it just uses the existing tracepoint augmentation code in 'perf
    13	 * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
    14	 * code that will combine entry/exit in a strace like way.
    15	 */
       
    16	#include <stdio.h>
    17	#include <linux/socket.h>
       
    18	/* bpf-output associated map */
    19	struct bpf_map SEC("maps") __augmented_syscalls__ = {
    20		.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
    21		.key_size = sizeof(int),
    22		.value_size = sizeof(u32),
    23		.max_entries = __NR_CPUS__,
    24	};
       
    25	struct syscall_enter_args {
    26		unsigned long long common_tp_fields;
    27		long		   syscall_nr;
    28		unsigned long	   args[6];
    29	};
       
    30	struct syscall_exit_args {
    31		unsigned long long common_tp_fields;
    32		long		   syscall_nr;
    33		long		   ret;
    34	};
       
    35	struct augmented_filename {
    36		unsigned int	size;
    37		int		reserved;
    38		char		value[256];
    39	};
       
    40	#define SYS_OPEN 2
    41	#define SYS_OPENAT 257
       
    42	SEC("raw_syscalls:sys_enter")
    43	int sys_enter(struct syscall_enter_args *args)
    44	{
    45		struct {
    46			struct syscall_enter_args args;
    47			struct augmented_filename filename;
    48		} augmented_args;
    49		unsigned int len = sizeof(augmented_args);
    50		unsigned int filename_arg = 6;
       
    51		probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
       
    52		switch (augmented_args.args.syscall_nr) {
    53		case SYS_OPEN:	 filename_arg = 0; break;
    54		case SYS_OPENAT: filename_arg = 1; break;
    55		}
       
    56		if (filename_arg <= 5) {
    57			augmented_args.filename.reserved = 0;
    58			augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
    59								      sizeof(augmented_args.filename.value),
    60								      (const void *)args->args[0]);
    61			if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
    62				len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
    63				len &= sizeof(augmented_args.filename.value) - 1;
    64			}
    65		} else {
    66			len = sizeof(augmented_args.args);
    67		}
       
    68		perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
    69		return 0;
    70	}
       
    71	SEC("raw_syscalls:sys_exit")
    72	int sys_exit(struct syscall_exit_args *args)
    73	{
    74		return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
    75	}
       
    76	license(GPL);

In line #60 if I change that to 1, then "openat" works and "open"
doesn't, so what I wanted was to use filename_arg there as the index,
now it comes from that switch, but really it'll come from userspace,
that knows the syscall tables for each arch, etc.

But if I do that, i.e. apply this patch to that program:

--- /wb/augmented_raw_syscalls.c.old	2018-11-01 15:43:55.000394234 -0300
+++ /wb/augmented_raw_syscalls.c	2018-11-01 15:44:15.102367838 -0300
@@ -67,7 +67,7 @@
 		augmented_args.filename.reserved = 0;
 		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
 							      sizeof(augmented_args.filename.value),
-							      (const void *)args->args[0]);
+							      (const void *)args->args[filename_arg]);
 		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
 			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
 			len &= sizeof(augmented_args.filename.value) - 1;

Then I end up with the verifier complying, I tried various ways to get
around the compiler about filename_arg being safe to use as an index,
but I couldn't find the right trick, ideas?

This is what I end up with when I apply that patch:

[root@...enth perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
                     \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

 Usage: perf trace [<options>] [<command>]
    or: perf trace [<options>] -- <command> [<options>]
    or: perf trace record [<options>] [<command>]
    or: perf trace record [<options>] -- <command> [<options>]

    -e, --event <event>   event/syscall selector. use 'perf list' to list available events
[root@...enth perf]# 

Using -v, as suggested, I get:

[root@...enth perf]# trace -v -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
bpf: builtin compilation failed: -95, try external compiler
Kernel build dir is set to /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
set env: KBUILD_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
unset env: KBUILD_OPTS
include option is set to  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h 
set env: NR_CPUS=4
set env: LINUX_VERSION_CODE=0x41300
set env: CLANG_EXEC=/usr/local/bin/clang
unset env: CLANG_OPTIONS
set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h 
set env: PERF_BPF_INC_OPTIONS=-I/home/acme/lib/perf/include/bpf
set env: WORKING_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
set env: CLANG_SOURCE=/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c
llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE
llvm compiling command : /usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41300  -I/home/acme/lib/perf/include/bpf  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h  -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build -c /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c -target bpf  -O2 -o - 
libbpf: loading object 'tools/perf/examples/bpf/augmented_raw_syscalls.c' from buffer
libbpf: section(1) .strtab, size 168, link 0, flags 0, type=3
libbpf: skip section(1) .strtab
libbpf: section(2) .text, size 0, link 0, flags 6, type=1
libbpf: skip section(2) .text
libbpf: section(3) raw_syscalls:sys_enter, size 376, link 0, flags 6, type=1
libbpf: found program raw_syscalls:sys_enter
libbpf: section(4) .relraw_syscalls:sys_enter, size 16, link 10, flags 0, type=9
libbpf: section(5) raw_syscalls:sys_exit, size 16, link 0, flags 6, type=1
libbpf: found program raw_syscalls:sys_exit
libbpf: section(6) maps, size 56, link 0, flags 3, type=1
libbpf: section(7) license, size 4, link 0, flags 3, type=1
libbpf: license of tools/perf/examples/bpf/augmented_raw_syscalls.c is GPL
libbpf: section(8) version, size 4, link 0, flags 3, type=1
libbpf: kernel version of tools/perf/examples/bpf/augmented_raw_syscalls.c is 41300
libbpf: section(9) .llvm_addrsig, size 6, link 10, flags 80000000, type=1879002115
libbpf: skip section(9) .llvm_addrsig
libbpf: section(10) .symtab, size 240, link 1, flags 0, type=2
libbpf: maps in tools/perf/examples/bpf/augmented_raw_syscalls.c: 2 maps in 56 bytes
libbpf: map 0 is "__augmented_syscalls__"
libbpf: map 1 is "__bpf_stdout__"
libbpf: collecting relocating info for: 'raw_syscalls:sys_enter'
libbpf: relo for 4 value 28 name 124
libbpf: relocation: insn_idx=39
libbpf: relocation: find map 1 (__augmented_syscalls__) for insn 39
bpf: config program 'raw_syscalls:sys_enter'
bpf: config program 'raw_syscalls:sys_exit'
libbpf: create map __bpf_stdout__: fd=3
libbpf: create map __augmented_syscalls__: fd=4
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf: 
0: (bf) r6 = r1
1: (bf) r1 = r10
2: (07) r1 += -328
3: (b7) r7 = 64
4: (b7) r2 = 64
5: (bf) r3 = r6
6: (85) call bpf_probe_read#4
7: (b7) r2 = 1
8: (79) r3 = *(u64 *)(r10 -320)
9: (15) if r3 == 0x101 goto pc+1
 R0=inv(id=0) R2=inv1 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
10: (b7) r2 = 6
11: (b7) r1 = 0
12: (15) if r3 == 0x2 goto pc+1
 R0=inv(id=0) R1=inv0 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
13: (bf) r1 = r2
14: (25) if r1 > 0x5 goto pc+21
 R0=inv(id=0) R1=inv6 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
15: (b7) r2 = 0
16: (63) *(u32 *)(r10 -260) = r2
17: (67) r1 <<= 32
18: (77) r1 >>= 32
19: (67) r1 <<= 3
20: (bf) r2 = r6
21: (0f) r2 += r1
22: (79) r3 = *(u64 *)(r2 +16)
R2 invalid mem access 'inv'

libbpf: -- END LOG --
libbpf: failed to load program 'raw_syscalls:sys_enter'
libbpf: failed to load object 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading)
event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
                     \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

 Usage: perf trace [<options>] [<command>]
    or: perf trace [<options>] -- <command> [<options>]
    or: perf trace record [<options>] [<command>]
    or: perf trace record [<options>] -- <command> [<options>]

    -e, --event <event>   event/syscall selector. use 'perf list' to list available events
[root@...enth perf]# 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ