lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAH0uvojbGkQKnvSi7gqtLVJQs2xxO2zBOLCukBiOKKPy7O0uFw@mail.gmail.com>
Date: Mon, 29 Jul 2024 21:36:07 +0800
From: Howard Chu <howardchu95@...il.com>
To: Ian Rogers <irogers@...gle.com>
Cc: namhyung@...nel.org, acme@...nel.org, adrian.hunter@...el.com, 
	jolsa@...nel.org, kan.liang@...ux.intel.com, linux-perf-users@...r.kernel.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 1/5] perf record off-cpu: Add direct off-cpu event

Hello Ian,

On Sat, Jul 27, 2024 at 7:49 AM Ian Rogers <irogers@...gle.com> wrote:
>
> On Fri, Jul 26, 2024 at 3:28 AM Howard Chu <howardchu95@...il.com> wrote:
> >
> > Add direct off-cpu event called "offcpu-time-direct". Add a threshold to
> > dump direct off-cpu samples, "--off-cpu-thresh". Default value of
> > --off-cpu-thresh is UULONG_MAX(no direct off-cpu samples), and
> > --off-cpu-thresh's unit is milliseconds.
> >
> > Bind fds and sample_id in off_cpu_start()
> >
> > Note that we add "offcpu-time-direct" event using parse_event(), because we
> > need to make it no-inherit, otherwise perf_event_open() will fail.
> >
> > Introduce sample_type_embed, indicating the sample_type of a sample
> > embedded in BPF output. More discussions in later patches.
> >
> > Signed-off-by: Howard Chu <howardchu95@...il.com>
> > Suggested-by: Ian Rogers <irogers@...gle.com>
> > ---
> >  tools/perf/builtin-record.c   |  2 ++
> >  tools/perf/util/bpf_off_cpu.c | 53 ++++++++++++++++++++++++++++++++++-
> >  tools/perf/util/off_cpu.h     |  1 +
> >  tools/perf/util/record.h      |  1 +
> >  4 files changed, 56 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index a94516e8c522..708d48d309d6 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -3325,6 +3325,7 @@ static struct record record = {
> >                 .ctl_fd              = -1,
> >                 .ctl_fd_ack          = -1,
> >                 .synth               = PERF_SYNTH_ALL,
> > +               .off_cpu_thresh      = ULLONG_MAX,
> >         },
> >         .tool = {
> >                 .sample         = process_sample_event,
> > @@ -3557,6 +3558,7 @@ static struct option __record_options[] = {
> >                             "write collected trace data into several data files using parallel threads",
> >                             record__parse_threads),
> >         OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
> > +       OPT_U64(0, "off-cpu-thresh", &record.opts.off_cpu_thresh, "time threshold(in ms) for dumping off-cpu events"),
> >         OPT_END()
> >  };
> >
> > diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
> > index 6af36142dc5a..905a11c96c5b 100644
> > --- a/tools/perf/util/bpf_off_cpu.c
> > +++ b/tools/perf/util/bpf_off_cpu.c
> > @@ -13,6 +13,7 @@
> >  #include "util/cgroup.h"
> >  #include "util/strlist.h"
> >  #include <bpf/bpf.h>
> > +#include <internal/xyarray.h>
> >
> >  #include "bpf_skel/off_cpu.skel.h"
> >
> > @@ -45,10 +46,12 @@ static int off_cpu_config(struct evlist *evlist)
> >                 .size   = sizeof(attr), /* to capture ABI version */
> >         };
> >         char *evname = strdup(OFFCPU_EVENT);
> > +       char off_cpu_direct_event[64];
> >
> >         if (evname == NULL)
> >                 return -ENOMEM;
> >
> > +       /* off-cpu event in the end */
> >         evsel = evsel__new(&attr);
> >         if (!evsel) {
> >                 free(evname);
> > @@ -65,12 +68,22 @@ static int off_cpu_config(struct evlist *evlist)
> >         free(evsel->name);
> >         evsel->name = evname;
> >
> > +       /* direct off-cpu event */
> > +       snprintf(off_cpu_direct_event, sizeof(off_cpu_direct_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT_DIRECT);
> > +       if (parse_event(evlist, off_cpu_direct_event)) {
> > +               pr_err("Failed to open off-cpu event\n");
> > +               return -1;
> > +       }
> > +
> >         return 0;
> >  }
> >
> >  static void off_cpu_start(void *arg)
> >  {
> >         struct evlist *evlist = arg;
> > +       struct evsel *evsel;
> > +       struct perf_cpu pcpu;
> > +       int i, err;
> >
> >         /* update task filter for the given workload */
> >         if (!skel->bss->has_cpu && !skel->bss->has_task &&
> > @@ -86,6 +99,27 @@ static void off_cpu_start(void *arg)
> >                 bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
> >         }
> >
> > +       /* sample id and fds in BPF's perf_event_array can only be set after record__open() */
> > +       evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT_DIRECT);
> > +       if (evsel == NULL) {
> > +               pr_err("%s evsel not found\n", OFFCPU_EVENT_DIRECT);
> > +               return;
> > +       }
> > +
> > +       if (evsel->core.id)
> > +               skel->bss->sample_id = evsel->core.id[0];
> > +
> > +       perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) {
> > +               err = bpf_map__update_elem(skel->maps.offcpu_output,
> > +                                          &pcpu.cpu, sizeof(__u32),
> > +                                          xyarray__entry(evsel->core.fd, pcpu.cpu, 0),
> > +                                          sizeof(__u32), BPF_ANY);
> > +               if (err) {
> > +                       pr_err("Failed to update perf event map for direct off-cpu dumping\n");
> > +                       return;
> > +               }
> > +       }
> > +
> >         skel->bss->enabled = 1;
> >  }
> >
> > @@ -130,14 +164,24 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
> >  {
> >         int err, fd, i;
> >         int ncpus = 1, ntasks = 1, ncgrps = 1;
> > +       __u64 offcpu_thresh;
> >         struct strlist *pid_slist = NULL;
> >         struct str_node *pos;
> > +       struct evsel *evsel;
> >
> >         if (off_cpu_config(evlist) < 0) {
> >                 pr_err("Failed to config off-cpu BPF event\n");
> >                 return -1;
> >         }
> >
> > +       evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT_DIRECT);
> > +       if (evsel == NULL) {
> > +               pr_err("%s evsel not found\n", OFFCPU_EVENT_DIRECT);
> > +               return -1 ;
> > +       }
> > +
> > +       evsel->sample_type_embed = OFFCPU_SAMPLE_TYPES;
> > +
> >         skel = off_cpu_bpf__open();
> >         if (!skel) {
> >                 pr_err("Failed to open off-cpu BPF skeleton\n");
> > @@ -250,7 +294,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
> >         }
> >
> >         if (evlist__first(evlist)->cgrp) {
> > -               struct evsel *evsel;
> >                 u8 val = 1;
> >
> >                 skel->bss->has_cgroup = 1;
> > @@ -272,6 +315,14 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
> >                 }
> >         }
> >
> > +       offcpu_thresh = opts->off_cpu_thresh;
> > +
> > +       if (opts->off_cpu_thresh != ULLONG_MAX)
> > +               offcpu_thresh = opts->off_cpu_thresh * 1000000; /* off-cpu-thresh is in ms */
>
> nit: In this comment, it's not clear if you are referring to the
> option or the variable. In modern languages it is usual to have some
> kind of "duration" type. As we're using u64s I'd be tempted to add a
> "_ms" suffix, just to make clear what the units for the time are. I
> think that'd make this:
> offcpu_thresh_ms = opts->off_cpu_thresh_ns * 1000000

I'll do this, but in microseconds as you requested in another review.

Thanks,
Howard

>
> Thanks,
> Ian
>
> > +
> > +       skel->bss->offcpu_thresh = offcpu_thresh;
> > +       skel->bss->sample_type   = OFFCPU_SAMPLE_TYPES;
> > +
> >         err = off_cpu_bpf__attach(skel);
> >         if (err) {
> >                 pr_err("Failed to attach off-cpu BPF skeleton\n");
> > diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
> > index 2dd67c60f211..a349f8e300e0 100644
> > --- a/tools/perf/util/off_cpu.h
> > +++ b/tools/perf/util/off_cpu.h
> > @@ -9,6 +9,7 @@ struct perf_session;
> >  struct record_opts;
> >
> >  #define OFFCPU_EVENT  "offcpu-time"
> > +#define OFFCPU_EVENT_DIRECT  "offcpu-time-direct"
> >
> >  #define OFFCPU_SAMPLE_TYPES  (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \
> >                               PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
> > diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> > index a6566134e09e..3c11416e6627 100644
> > --- a/tools/perf/util/record.h
> > +++ b/tools/perf/util/record.h
> > @@ -79,6 +79,7 @@ struct record_opts {
> >         int           synth;
> >         int           threads_spec;
> >         const char    *threads_user_spec;
> > +       u64           off_cpu_thresh;
> >  };
> >
> >  extern const char * const *record_usage;
> > --
> > 2.45.2
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ