linux-kernel - Re: [PATCH] perf tests: Don't retest sections in "Object code reading"

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fV1N-j+f4GBFnDWsmoMZcz_k0U=nu1A7NZz-g4gzCH4KA@mail.gmail.com>
Date: Tue, 7 Oct 2025 06:16:36 -0700
From: Ian Rogers <irogers@...gle.com>
To: James Clark <james.clark@...aro.org>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Mark Rutland <mark.rutland@....com>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>, 
	Adrian Hunter <adrian.hunter@...el.com>, Leo Yan <leo.yan@....com>, 
	linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] perf tests: Don't retest sections in "Object code reading"

On Tue, Oct 7, 2025 at 2:10 AM James Clark <james.clark@...aro.org> wrote:
>
>
>
> On 06/10/2025 4:21 pm, Ian Rogers wrote:
> > On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@...aro.org> wrote:
> >>
> >> We already only test each kcore map once, but on slow systems
> >> (particularly with network filesystems) even the non-kcore maps are
> >> slow. The test can test the same objump output over and over which only
> >> wastes time. Generalize the skipping mechanism to track all DSOs and
> >> addresses so that each section is only tested once.
> >>
> >> On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
> >> network filesystem, the original runtime is:
> >>
> >>    real  1m51.126s
> >>    user  0m19.445s
> >>    sys   1m15.431s
> >>
> >> And the new runtime is:
> >>
> >>    real  0m48.873s
> >>    user  0m8.031s
> >>    sys   0m32.353s
> >>
> >> Signed-off-by: James Clark <james.clark@...aro.org>
> >
> > Reviewed-by: Ian Rogers <irogers@...gle.com>
> >
> >> ---
> >>   tools/perf/tests/code-reading.c | 119 ++++++++++++++++++++++++++++------------
> >>   1 file changed, 85 insertions(+), 34 deletions(-)
> >>
> >> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> >> index 9c2091310191..4c9fbf6965c4 100644
> >> --- a/tools/perf/tests/code-reading.c
> >> +++ b/tools/perf/tests/code-reading.c
> >> @@ -2,6 +2,7 @@
> >>   #include <errno.h>
> >>   #include <linux/kconfig.h>
> >>   #include <linux/kernel.h>
> >> +#include <linux/rbtree.h>
> >>   #include <linux/types.h>
> >>   #include <inttypes.h>
> >>   #include <stdlib.h>
> >> @@ -39,11 +40,64 @@
> >>   #define BUFSZ  1024
> >>   #define READLEN        128
> >>
> >> -struct state {
> >> -       u64 done[1024];
> >> -       size_t done_cnt;
> >> +struct tested_section {
> >> +       struct rb_node rb_node;
> >> +       u64 addr;
> >> +       char path[PATH_MAX];
> >>   };
> >>
> >> +static bool tested_code_insert_or_exists(const char *path, u64 addr,
> >> +                                        struct rb_root *tested_sections)
> >> +{
> >> +       struct rb_node **node = &tested_sections->rb_node;
> >> +       struct rb_node *parent = NULL;
> >> +       struct tested_section *data;
> >> +
> >> +       while (*node) {
> >> +               int cmp;
> >> +
> >> +               parent = *node;
> >> +               data = rb_entry(*node, struct tested_section, rb_node);
> >> +               cmp = strcmp(path, data->path);
> >> +               if (!cmp) {
> >> +                       if (addr < data->addr)
> >> +                               cmp = -1;
> >> +                       else if (addr > data->addr)
> >> +                               cmp = 1;
> >> +                       else
> >> +                               return true; /* already tested */
> >> +               }
> >> +
> >> +               if (cmp < 0)
> >> +                       node = &(*node)->rb_left;
> >> +               else
> >> +                       node = &(*node)->rb_right;
> >> +       }
> >> +
> >> +       data = zalloc(sizeof(*data));
> >> +       if (!data)
> >> +               return true;
> >> +
> >> +       data->addr = addr;
> >> +       strlcpy(data->path, path, sizeof(data->path));
> >
> > nit: perhaps strdup rather than having 4kb per tested_section.
> >
> > Thanks,
> > Ian
> >
>
> Oh yeah that would have been better, not sure why I didn't do it that
> way. Although the max sections I saw was around 50, and it's usually a
> lot less so it's probably not worth the churn to change it now that
> Arnaldo's applied it?

I'm easy. It is one of those things I'd probably eye-ball at some
later date and send a driveby patch to change. You could send a patch
and let Arnaldo decide whether to squash it or add a follow up.

Thanks,
Ian

> Thanks
> James
>
> >> +       rb_link_node(&data->rb_node, parent, node);
> >> +       rb_insert_color(&data->rb_node, tested_sections);
> >> +       return false;
> >> +}
> >> +
> >> +static void tested_sections__free(struct rb_root *root)
> >> +{
> >> +       while (!RB_EMPTY_ROOT(root)) {
> >> +               struct rb_node *node = rb_first(root);
> >> +               struct tested_section *ts = rb_entry(node,
> >> +                                                    struct tested_section,
> >> +                                                    rb_node);
> >> +
> >> +               rb_erase(node, root);
> >> +               free(ts);
> >> +       }
> >> +}
> >> +
> >>   static size_t read_objdump_chunk(const char **line, unsigned char **buf,
> >>                                   size_t *buf_len)
> >>   {
> >> @@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
> >>   }
> >>
> >>   static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >> -                           struct thread *thread, struct state *state)
> >> +                           struct thread *thread,
> >> +                           struct rb_root *tested_sections)
> >>   {
> >>          struct addr_location al;
> >>          unsigned char buf1[BUFSZ] = {0};
> >>          unsigned char buf2[BUFSZ] = {0};
> >>          size_t ret_len;
> >>          u64 objdump_addr;
> >> +       u64 skip_addr;
> >>          const char *objdump_name;
> >>          char decomp_name[KMOD_DECOMP_LEN];
> >>          bool decomp = false;
> >> @@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >>                  goto out;
> >>          }
> >>
> >> +       /*
> >> +        * Don't retest the same addresses. objdump struggles with kcore - try
> >> +        * each map only once even if the address is different.
> >> +        */
> >> +       skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
> >> +       if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
> >> +                                        tested_sections)) {
> >> +               pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
> >> +                        dso__long_name(dso), skip_addr);
> >> +               goto out;
> >> +       }
> >> +
> >>          pr_debug("On file address is: %#"PRIx64"\n", al.addr);
> >>
> >>          if (len > BUFSZ)
> >> @@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >>                  goto out;
> >>          }
> >>
> >> -       /* objdump struggles with kcore - try each map only once */
> >> -       if (dso__is_kcore(dso)) {
> >> -               size_t d;
> >> -
> >> -               for (d = 0; d < state->done_cnt; d++) {
> >> -                       if (state->done[d] == map__start(al.map)) {
> >> -                               pr_debug("kcore map tested already");
> >> -                               pr_debug(" - skipping\n");
> >> -                               goto out;
> >> -                       }
> >> -               }
> >> -               if (state->done_cnt >= ARRAY_SIZE(state->done)) {
> >> -                       pr_debug("Too many kcore maps - skipping\n");
> >> -                       goto out;
> >> -               }
> >> -               state->done[state->done_cnt++] = map__start(al.map);
> >> -       }
> >> -
> >>          objdump_name = dso__long_name(dso);
> >>          if (dso__needs_decompress(dso)) {
> >>                  if (dso__decompress_kmodule_path(dso, objdump_name,
> >> @@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >>          return err;
> >>   }
> >>
> >> -static int process_sample_event(struct machine *machine,
> >> -                               struct evlist *evlist,
> >> -                               union perf_event *event, struct state *state)
> >> +static int process_sample_event(struct machine *machine, struct evlist *evlist,
> >> +                               union perf_event *event,
> >> +                               struct rb_root *tested_sections)
> >>   {
> >>          struct perf_sample sample;
> >>          struct thread *thread;
> >> @@ -494,7 +544,8 @@ static int process_sample_event(struct machine *machine,
> >>                  goto out;
> >>          }
> >>
> >> -       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
> >> +       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
> >> +                              tested_sections);
> >>          thread__put(thread);
> >>   out:
> >>          perf_sample__exit(&sample);
> >> @@ -502,10 +553,11 @@ static int process_sample_event(struct machine *machine,
> >>   }
> >>
> >>   static int process_event(struct machine *machine, struct evlist *evlist,
> >> -                        union perf_event *event, struct state *state)
> >> +                        union perf_event *event, struct rb_root *tested_sections)
> >>   {
> >>          if (event->header.type == PERF_RECORD_SAMPLE)
> >> -               return process_sample_event(machine, evlist, event, state);
> >> +               return process_sample_event(machine, evlist, event,
> >> +                                           tested_sections);
> >>
> >>          if (event->header.type == PERF_RECORD_THROTTLE ||
> >>              event->header.type == PERF_RECORD_UNTHROTTLE)
> >> @@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
> >>   }
> >>
> >>   static int process_events(struct machine *machine, struct evlist *evlist,
> >> -                         struct state *state)
> >> +                         struct rb_root *tested_sections)
> >>   {
> >>          union perf_event *event;
> >>          struct mmap *md;
> >> @@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
> >>                          continue;
> >>
> >>                  while ((event = perf_mmap__read_event(&md->core)) != NULL) {
> >> -                       ret = process_event(machine, evlist, event, state);
> >> +                       ret = process_event(machine, evlist, event, tested_sections);
> >>                          perf_mmap__consume(&md->core);
> >>                          if (ret < 0)
> >>                                  return ret;
> >> @@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
> >>                          .uses_mmap   = true,
> >>                  },
> >>          };
> >> -       struct state state = {
> >> -               .done_cnt = 0,
> >> -       };
> >> +       struct rb_root tested_sections = RB_ROOT;
> >>          struct perf_thread_map *threads = NULL;
> >>          struct perf_cpu_map *cpus = NULL;
> >>          struct evlist *evlist = NULL;
> >> @@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
> >>
> >>          evlist__disable(evlist);
> >>
> >> -       ret = process_events(machine, evlist, &state);
> >> +       ret = process_events(machine, evlist, &tested_sections);
> >>          if (ret < 0)
> >>                  goto out_put;
> >>
> >> @@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
> >>          perf_thread_map__put(threads);
> >>          machine__delete(machine);
> >>          perf_env__exit(&host_env);
> >> +       tested_sections__free(&tested_sections);
> >>
> >>          return err;
> >>   }
> >>
> >> ---
> >> base-commit: a22d167ed82505f770340c3a7c257c04ba24dac9
> >> change-id: 20251006-james-perf-object-code-reading-9646e486d595
> >>
> >> Best regards,
> >> --
> >> James Clark <james.clark@...aro.org>
> >>
>