lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAEf4Bzagyaycg7aWrwx9D8_A5p0LgD2-fg94sq7+5xiL_tHh2g@mail.gmail.com>
Date:   Tue, 2 Apr 2019 09:44:51 -0700
From:   Andrii Nakryiko <andrii.nakryiko@...il.com>
To:     Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     Ingo Molnar <mingo@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Jiri Olsa <jolsa@...nel.org>,
        Namhyung Kim <namhyung@...nel.org>,
        Clark Williams <williams@...hat.com>,
        linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
        Arnaldo Carvalho de Melo <acme@...hat.com>,
        Adrian Hunter <adrian.hunter@...el.com>,
        Andrii Nakryiko <andriin@...com>,
        Daniel Borkmann <borkmann@...earbox.net>,
        Gianluca Borello <g.borello@...il.com>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        Luis Cláudio Gonçalves <lclaudio@...hat.com>,
        Martin Lau <kafai@...com>, Wang Nan <wangnan0@...wei.com>,
        Yonghong Song <yhs@...com>
Subject: Re: [PATCH 03/44] perf augmented_raw_syscalls: Use a PERCPU_ARRAY map
 to copy more string bytes

On Tue, Apr 2, 2019 at 9:12 AM Arnaldo Carvalho de Melo <acme@...nel.org> wrote:
>
> From: Arnaldo Carvalho de Melo <acme@...hat.com>
>
> The previous method, copying to the BPF stack limited us in how many
> bytes we could copy from strings, use a PERCPU_ARRAY map like devised by
> the sysdig guys[1] to copy more bytes:
>
> Before:
>
>   # trace --no-inherit -e openat touch `python -c "print "$s" 'a' * 2000"`
>   touch: cannot touch 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa': File name too long
>   openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
>   openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
>   openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
>   openat(AT_FDCWD, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", O_CREAT|O_NOCTTY|O_NONBLOCK|O_WRONLY, S_IRUGO|S_IWUGO) = -1 ENAMETOOLONG (File name too long)
>   openat(AT_FDCWD, "/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3
>   openat(AT_FDCWD, "/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
>   openat(AT_FDCWD, "/usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
>   <SNIP some openat calls>
>   #
>
> After:
>
>   [root@...co acme]# trace --no-inherit -e openat touch `python -c "print "$s" 'a' * 2000"`
>   <STRIP what is the same as in the 'before' part>
>   openat(AT_FDCWD, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", O_CREAT|O_NOCTTY|O_NONBLOC) = -1 ENAMETOOLONG (File name too long)
>   <STRIP what is the same as in the 'before' part>
>
> If we leave something like 'perf trace -e string' to trace all syscalls
> with a string, and then do some 'perf top', to get some annotation for
> the augmented_raw_syscalls.o BPF program we get:
>
>        │     → callq  *ffffffffc45576d1                                                                                                          ▒
>        │                augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,                                          ▒
>   0.05 │       mov    %eax,0x40(%r13)
>
> Looking with pahole, expanding types, asking for hex offsets and sizes,
> and use of BTF type information to see what is at that 0x40 offset from
> %r13:
>
>   # pahole -F btf -C augmented_args_filename --expand_types --hex /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o
>   struct augmented_args_filename {
>         struct syscall_enter_args {
>                 long long unsigned int common_tp_fields;                                 /*     0   0x8 */
>                 long int           syscall_nr;                                           /*   0x8   0x8 */
>                 long unsigned int  args[6];                                              /*  0x10  0x30 */
>         } args; /*     0  0x40 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         struct augmented_filename {
>                 unsigned int       size;                                                 /*  0x40   0x4 */
>                 int                reserved;                                             /*  0x44   0x4 */
>                 char               value[4096];                                          /*  0x48 0x1000 */
>         } filename; /*  0x40 0x1008 */
>
>         /* size: 4168, cachelines: 66, members: 2 */
>         /* last cacheline: 8 bytes */
>   };
>   #
>
> Then looking if PATH_MAX leaves some signature in the tests:
>
>        │                if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {                                            ▒
>        │       cmp    $0xfff,%rdi
>
> 0xfff == 4095
> sizeof(augmented_args->filename.value) == PATH_MAX == 4096
>
> [1] https://sysdig.com/blog/the-art-of-writing-ebpf-programs-a-primer/
>
> Cc: Adrian Hunter <adrian.hunter@...el.com>
> Cc: Andrii Nakryiko <andriin@...com>
> Cc: Daniel Borkmann <borkmann@...earbox.net>
> Cc: Gianluca Borello <g.borello@...il.com>
> Cc: Jesper Dangaard Brouer <brouer@...hat.com>
> Cc: Jiri Olsa <jolsa@...nel.org>
> Cc: Luis Cláudio Gonçalves <lclaudio@...hat.com>
> cc: Martin Lau <kafai@...com>
> Cc: Namhyung Kim <namhyung@...nel.org>
> Cc: Wang Nan <wangnan0@...wei.com>
> Cc: Yonghong Song <yhs@...com>
> Link: https://lkml.kernel.org/n/tip-76gce2d2ghzq537ubwhjkone@git.kernel.org
> Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
> ---

Looks good to me. I've been using PERCPU_ARRAY as a sort of heap for
many things that don't fit on BPF stack.

Acked-by: Andrii Nakryiko <andriin@...com>

>  .../examples/bpf/augmented_raw_syscalls.c     | 46 +++++++++++--------
>  1 file changed, 28 insertions(+), 18 deletions(-)
>
> diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
> index 9f8b31ad7a49..2422894a8194 100644
> --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
> +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
> @@ -15,6 +15,7 @@
>   */
>
>  #include <unistd.h>
> +#include <linux/limits.h>
>  #include <pid_filter.h>
>
>  /* bpf-output associated map */
> @@ -41,7 +42,7 @@ struct syscall_exit_args {
>  struct augmented_filename {
>         unsigned int    size;
>         int             reserved;
> -       char            value[256];
> +       char            value[PATH_MAX];
>  };
>
>  /* syscalls where the first arg is a string */
> @@ -119,23 +120,32 @@ struct augmented_filename {
>
>  pid_filter(pids_filtered);
>
> +struct augmented_args_filename {
> +       struct syscall_enter_args args;
> +       struct augmented_filename filename;
> +};
> +
> +bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1);
> +
>  SEC("raw_syscalls:sys_enter")
>  int sys_enter(struct syscall_enter_args *args)
>  {
> -       struct {
> -               struct syscall_enter_args args;
> -               struct augmented_filename filename;
> -       } augmented_args;
> -       struct syscall *syscall;
> -       unsigned int len = sizeof(augmented_args);
> +       struct augmented_args_filename *augmented_args;
> +       unsigned int len = sizeof(*augmented_args);
>         const void *filename_arg = NULL;
> +       struct syscall *syscall;
> +       int key = 0;
> +
> +        augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
> +        if (augmented_args == NULL)
> +                return 1;
>
>         if (pid_filter__has(&pids_filtered, getpid()))
>                 return 0;
>
> -       probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
> +       probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
>
> -       syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr);
> +       syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
>         if (syscall == NULL || !syscall->enabled)
>                 return 0;
>         /*
> @@ -191,7 +201,7 @@ int sys_enter(struct syscall_enter_args *args)
>          * processor architecture, making the kernel part the same no matter what
>          * kernel version or processor architecture it runs on.
>          */
> -       switch (augmented_args.args.syscall_nr) {
> +       switch (augmented_args->args.syscall_nr) {
>         case SYS_ACCT:
>         case SYS_ADD_KEY:
>         case SYS_CHDIR:
> @@ -263,20 +273,20 @@ int sys_enter(struct syscall_enter_args *args)
>         }
>
>         if (filename_arg != NULL) {
> -               augmented_args.filename.reserved = 0;
> -               augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
> -                                                             sizeof(augmented_args.filename.value),
> +               augmented_args->filename.reserved = 0;
> +               augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,
> +                                                             sizeof(augmented_args->filename.value),
>                                                               filename_arg);
> -               if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
> -                       len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
> -                       len &= sizeof(augmented_args.filename.value) - 1;
> +               if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {
> +                       len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size;
> +                       len &= sizeof(augmented_args->filename.value) - 1;
>                 }
>         } else {
> -               len = sizeof(augmented_args.args);
> +               len = sizeof(augmented_args->args);
>         }
>
>         /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
> -       return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
> +       return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
>  }
>
>  SEC("raw_syscalls:sys_exit")
> --
> 2.20.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ