lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fWi_FhQahxsKOaqdMp9agZqC3obHR_mo78+Ms9v1wZavg@mail.gmail.com>
Date: Fri, 26 Jul 2024 16:48:52 -0700
From: Ian Rogers <irogers@...gle.com>
To: Howard Chu <howardchu95@...il.com>
Cc: namhyung@...nel.org, acme@...nel.org, adrian.hunter@...el.com, 
	jolsa@...nel.org, kan.liang@...ux.intel.com, linux-perf-users@...r.kernel.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 1/5] perf record off-cpu: Add direct off-cpu event

On Fri, Jul 26, 2024 at 3:28 AM Howard Chu <howardchu95@...il.com> wrote:
>
> Add direct off-cpu event called "offcpu-time-direct". Add a threshold to
> dump direct off-cpu samples, "--off-cpu-thresh". Default value of
> --off-cpu-thresh is UULONG_MAX(no direct off-cpu samples), and
> --off-cpu-thresh's unit is milliseconds.
>
> Bind fds and sample_id in off_cpu_start()
>
> Note that we add "offcpu-time-direct" event using parse_event(), because we
> need to make it no-inherit, otherwise perf_event_open() will fail.
>
> Introduce sample_type_embed, indicating the sample_type of a sample
> embedded in BPF output. More discussions in later patches.
>
> Signed-off-by: Howard Chu <howardchu95@...il.com>
> Suggested-by: Ian Rogers <irogers@...gle.com>
> ---
>  tools/perf/builtin-record.c   |  2 ++
>  tools/perf/util/bpf_off_cpu.c | 53 ++++++++++++++++++++++++++++++++++-
>  tools/perf/util/off_cpu.h     |  1 +
>  tools/perf/util/record.h      |  1 +
>  4 files changed, 56 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index a94516e8c522..708d48d309d6 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -3325,6 +3325,7 @@ static struct record record = {
>                 .ctl_fd              = -1,
>                 .ctl_fd_ack          = -1,
>                 .synth               = PERF_SYNTH_ALL,
> +               .off_cpu_thresh      = ULLONG_MAX,
>         },
>         .tool = {
>                 .sample         = process_sample_event,
> @@ -3557,6 +3558,7 @@ static struct option __record_options[] = {
>                             "write collected trace data into several data files using parallel threads",
>                             record__parse_threads),
>         OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
> +       OPT_U64(0, "off-cpu-thresh", &record.opts.off_cpu_thresh, "time threshold(in ms) for dumping off-cpu events"),
>         OPT_END()
>  };
>
> diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
> index 6af36142dc5a..905a11c96c5b 100644
> --- a/tools/perf/util/bpf_off_cpu.c
> +++ b/tools/perf/util/bpf_off_cpu.c
> @@ -13,6 +13,7 @@
>  #include "util/cgroup.h"
>  #include "util/strlist.h"
>  #include <bpf/bpf.h>
> +#include <internal/xyarray.h>
>
>  #include "bpf_skel/off_cpu.skel.h"
>
> @@ -45,10 +46,12 @@ static int off_cpu_config(struct evlist *evlist)
>                 .size   = sizeof(attr), /* to capture ABI version */
>         };
>         char *evname = strdup(OFFCPU_EVENT);
> +       char off_cpu_direct_event[64];
>
>         if (evname == NULL)
>                 return -ENOMEM;
>
> +       /* off-cpu event in the end */
>         evsel = evsel__new(&attr);
>         if (!evsel) {
>                 free(evname);
> @@ -65,12 +68,22 @@ static int off_cpu_config(struct evlist *evlist)
>         free(evsel->name);
>         evsel->name = evname;
>
> +       /* direct off-cpu event */
> +       snprintf(off_cpu_direct_event, sizeof(off_cpu_direct_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT_DIRECT);
> +       if (parse_event(evlist, off_cpu_direct_event)) {
> +               pr_err("Failed to open off-cpu event\n");
> +               return -1;
> +       }
> +
>         return 0;
>  }
>
>  static void off_cpu_start(void *arg)
>  {
>         struct evlist *evlist = arg;
> +       struct evsel *evsel;
> +       struct perf_cpu pcpu;
> +       int i, err;
>
>         /* update task filter for the given workload */
>         if (!skel->bss->has_cpu && !skel->bss->has_task &&
> @@ -86,6 +99,27 @@ static void off_cpu_start(void *arg)
>                 bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
>         }
>
> +       /* sample id and fds in BPF's perf_event_array can only be set after record__open() */
> +       evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT_DIRECT);
> +       if (evsel == NULL) {
> +               pr_err("%s evsel not found\n", OFFCPU_EVENT_DIRECT);
> +               return;
> +       }
> +
> +       if (evsel->core.id)
> +               skel->bss->sample_id = evsel->core.id[0];
> +
> +       perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) {
> +               err = bpf_map__update_elem(skel->maps.offcpu_output,
> +                                          &pcpu.cpu, sizeof(__u32),
> +                                          xyarray__entry(evsel->core.fd, pcpu.cpu, 0),
> +                                          sizeof(__u32), BPF_ANY);
> +               if (err) {
> +                       pr_err("Failed to update perf event map for direct off-cpu dumping\n");
> +                       return;
> +               }
> +       }
> +
>         skel->bss->enabled = 1;
>  }
>
> @@ -130,14 +164,24 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
>  {
>         int err, fd, i;
>         int ncpus = 1, ntasks = 1, ncgrps = 1;
> +       __u64 offcpu_thresh;
>         struct strlist *pid_slist = NULL;
>         struct str_node *pos;
> +       struct evsel *evsel;
>
>         if (off_cpu_config(evlist) < 0) {
>                 pr_err("Failed to config off-cpu BPF event\n");
>                 return -1;
>         }
>
> +       evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT_DIRECT);
> +       if (evsel == NULL) {
> +               pr_err("%s evsel not found\n", OFFCPU_EVENT_DIRECT);
> +               return -1 ;
> +       }
> +
> +       evsel->sample_type_embed = OFFCPU_SAMPLE_TYPES;
> +
>         skel = off_cpu_bpf__open();
>         if (!skel) {
>                 pr_err("Failed to open off-cpu BPF skeleton\n");
> @@ -250,7 +294,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
>         }
>
>         if (evlist__first(evlist)->cgrp) {
> -               struct evsel *evsel;
>                 u8 val = 1;
>
>                 skel->bss->has_cgroup = 1;
> @@ -272,6 +315,14 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
>                 }
>         }
>
> +       offcpu_thresh = opts->off_cpu_thresh;
> +
> +       if (opts->off_cpu_thresh != ULLONG_MAX)
> +               offcpu_thresh = opts->off_cpu_thresh * 1000000; /* off-cpu-thresh is in ms */

nit: In this comment, it's not clear if you are referring to the
option or the variable. In modern languages it is usual to have some
kind of "duration" type. As we're using u64s I'd be tempted to add a
"_ms" suffix, just to make clear what the units for the time are. I
think that'd make this:
offcpu_thresh_ms = opts->off_cpu_thresh_ns * 1000000

Thanks,
Ian

> +
> +       skel->bss->offcpu_thresh = offcpu_thresh;
> +       skel->bss->sample_type   = OFFCPU_SAMPLE_TYPES;
> +
>         err = off_cpu_bpf__attach(skel);
>         if (err) {
>                 pr_err("Failed to attach off-cpu BPF skeleton\n");
> diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
> index 2dd67c60f211..a349f8e300e0 100644
> --- a/tools/perf/util/off_cpu.h
> +++ b/tools/perf/util/off_cpu.h
> @@ -9,6 +9,7 @@ struct perf_session;
>  struct record_opts;
>
>  #define OFFCPU_EVENT  "offcpu-time"
> +#define OFFCPU_EVENT_DIRECT  "offcpu-time-direct"
>
>  #define OFFCPU_SAMPLE_TYPES  (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \
>                               PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> index a6566134e09e..3c11416e6627 100644
> --- a/tools/perf/util/record.h
> +++ b/tools/perf/util/record.h
> @@ -79,6 +79,7 @@ struct record_opts {
>         int           synth;
>         int           threads_spec;
>         const char    *threads_user_spec;
> +       u64           off_cpu_thresh;
>  };
>
>  extern const char * const *record_usage;
> --
> 2.45.2
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ