lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAP-5=fW7zk2A1OqCAA1ngVydzzNmSbhhLC0D9OR7jzU_rg=1WA@mail.gmail.com>
Date: Wed, 31 Jan 2024 13:39:51 -0800
From: Ian Rogers <irogers@...gle.com>
To: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Namhyung Kim <namhyung@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>, 
	Jiri Olsa <jolsa@...nel.org>, Linux Kernel Mailing List <linux-kernel@...r.kernel.org>, 
	linux-perf-users@...r.kernel.org
Subject: Re: [PATCH 1/1] perf trace: Collect sys_nanosleep first argument

On Wed, Jan 31, 2024 at 1:30 PM Arnaldo Carvalho de Melo
<acme@...nel.org> wrote:
>
> That is a 'struct timespec' passed from userspace to the kernel as we
> can see with a system wide syscall tracing:
>
>   root@...ber:~# perf trace -e nanosleep
>        0.000 (10.102 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>       38.924 (10.077 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      100.177 (10.107 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      139.171 (10.063 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      200.603 (10.105 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      239.399 (10.064 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      300.994 (10.096 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      339.584 (10.067 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      401.335 (10.057 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      439.758 (10.166 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      501.814 (10.110 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      539.983 (10.227 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      602.284 (10.199 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      640.208 (10.105 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      702.662 (10.163 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      740.440 (10.107 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>      802.993 (10.159 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 })                   = 0
>   ^Croot@...ber:~# strace -p 9150 -e nanosleep
>
> If we then use the ptrace method to look at that podman process:
>
>   root@...ber:~# strace -p 9150 -e nanosleep
>   strace: Process 9150 attached
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
>   ^Cstrace: Process 9150 detached
>   root@...ber:~#
>
> With some changes we can get something closer to the strace output,
> still in system wide mode:
>
>   root@...ber:~# perf config trace.show_arg_names=false
>   root@...ber:~# perf config trace.show_duration=false
>   root@...ber:~# perf config trace.show_timestamp=false
>   root@...ber:~# perf config trace.show_zeros=true
>   root@...ber:~# perf config trace.args_alignment=0
>   root@...ber:~# perf trace -e nanosleep --max-events=10
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
>   root@...ber:~#
>   root@...ber:~# perf config
>   trace.show_arg_names=false
>   trace.show_duration=false
>   trace.show_timestamp=false
>   trace.show_zeros=true
>   trace.args_alignment=0
>   root@...ber:~# cat ~/.perfconfig
>   # this file is auto-generated.
>   [trace]
>         show_arg_names = false
>         show_duration = false
>         show_timestamp = false
>         show_zeros = true
>         args_alignment = 0
>   root@...ber:~#
>
> This will not get reused by any other syscall as nanosleep is the only
> one to have as its first argument a 'struct timespec" pointer argument
> passed from userspace to the kernel:
>
>   root@...ber:~# grep timespec /sys/kernel/tracing/events/syscalls/sys_enter_*/format | grep offset:16
>   /sys/kernel/tracing/events/syscalls/sys_enter_nanosleep/format:       field:struct __kernel_timespec * rqtp;  offset:16;      size:8; signed:0;
>   root@...ber:~#
>
> BTF based pretty printing will simplify all this, but then lets just get
> the low hanging fruits first.
>
> Cc: Adrian Hunter <adrian.hunter@...el.com>
> Cc: Ian Rogers <irogers@...gle.com>
> Cc: Jiri Olsa <jolsa@...nel.org>
> Cc: Namhyung Kim <namhyung@...nel.org>
> Link: https://lore.kernel.org/lkml/
> Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>

Reviewed-by: Ian Rogers <irogers@...gle.com>

Thanks,
Ian

> ---
>  tools/perf/builtin-trace.c                    |  2 ++
>  .../bpf_skel/augmented_raw_syscalls.bpf.c     | 21 +++++++++++++++++++
>  2 files changed, 23 insertions(+)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 109b8e64fe69ae32..6abe280dc38f1921 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -1048,6 +1048,8 @@ static const struct syscall_fmt syscall_fmts[] = {
>           .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
>         { .name     = "name_to_handle_at",
>           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
> +       { .name     = "nanosleep",
> +         .arg = { [0] = { .scnprintf = SCA_TIMESPEC,  /* req */ }, }, },
>         { .name     = "newfstatat",
>           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
>         { .name     = "open",
> diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> index 52c270330ae0d2f3..baecffbece14fb68 100644
> --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> @@ -354,6 +354,27 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
>         return 1; /* Failure: don't filter */
>  }
>
> +SEC("tp/syscalls/sys_enter_nanosleep")
> +int sys_enter_nanosleep(struct syscall_enter_args *args)
> +{
> +       struct augmented_args_payload *augmented_args = augmented_args_payload();
> +       const void *req_arg = (const void *)args->args[0];
> +       unsigned int len = sizeof(augmented_args->args);
> +       __u32 size = sizeof(struct timespec64);
> +
> +        if (augmented_args == NULL)
> +               goto failure;
> +
> +       if (size > sizeof(augmented_args->__data))
> +                goto failure;
> +
> +       bpf_probe_read_user(&augmented_args->__data, size, req_arg);
> +
> +       return augmented__output(args, augmented_args, len + size);
> +failure:
> +       return 1; /* Failure: don't filter */
> +}
> +
>  static pid_t getpid(void)
>  {
>         return bpf_get_current_pid_tgid();
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ