[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251006-james-perf-object-code-reading-v1-1-acab2129747d@linaro.org>
Date: Mon, 06 Oct 2025 14:11:07 +0100
From: James Clark <james.clark@...aro.org>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>, Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>, Leo Yan <leo.yan@....com>
Cc: linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
James Clark <james.clark@...aro.org>
Subject: [PATCH] perf tests: Don't retest sections in "Object code reading"
We already only test each kcore map once, but on slow systems
(particularly with network filesystems) even the non-kcore maps are
slow. The test can test the same objump output over and over which only
wastes time. Generalize the skipping mechanism to track all DSOs and
addresses so that each section is only tested once.
On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
network filesystem, the original runtime is:
real 1m51.126s
user 0m19.445s
sys 1m15.431s
And the new runtime is:
real 0m48.873s
user 0m8.031s
sys 0m32.353s
Signed-off-by: James Clark <james.clark@...aro.org>
---
tools/perf/tests/code-reading.c | 119 ++++++++++++++++++++++++++++------------
1 file changed, 85 insertions(+), 34 deletions(-)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 9c2091310191..4c9fbf6965c4 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -2,6 +2,7 @@
#include <errno.h>
#include <linux/kconfig.h>
#include <linux/kernel.h>
+#include <linux/rbtree.h>
#include <linux/types.h>
#include <inttypes.h>
#include <stdlib.h>
@@ -39,11 +40,64 @@
#define BUFSZ 1024
#define READLEN 128
-struct state {
- u64 done[1024];
- size_t done_cnt;
+struct tested_section {
+ struct rb_node rb_node;
+ u64 addr;
+ char path[PATH_MAX];
};
+static bool tested_code_insert_or_exists(const char *path, u64 addr,
+ struct rb_root *tested_sections)
+{
+ struct rb_node **node = &tested_sections->rb_node;
+ struct rb_node *parent = NULL;
+ struct tested_section *data;
+
+ while (*node) {
+ int cmp;
+
+ parent = *node;
+ data = rb_entry(*node, struct tested_section, rb_node);
+ cmp = strcmp(path, data->path);
+ if (!cmp) {
+ if (addr < data->addr)
+ cmp = -1;
+ else if (addr > data->addr)
+ cmp = 1;
+ else
+ return true; /* already tested */
+ }
+
+ if (cmp < 0)
+ node = &(*node)->rb_left;
+ else
+ node = &(*node)->rb_right;
+ }
+
+ data = zalloc(sizeof(*data));
+ if (!data)
+ return true;
+
+ data->addr = addr;
+ strlcpy(data->path, path, sizeof(data->path));
+ rb_link_node(&data->rb_node, parent, node);
+ rb_insert_color(&data->rb_node, tested_sections);
+ return false;
+}
+
+static void tested_sections__free(struct rb_root *root)
+{
+ while (!RB_EMPTY_ROOT(root)) {
+ struct rb_node *node = rb_first(root);
+ struct tested_section *ts = rb_entry(node,
+ struct tested_section,
+ rb_node);
+
+ rb_erase(node, root);
+ free(ts);
+ }
+}
+
static size_t read_objdump_chunk(const char **line, unsigned char **buf,
size_t *buf_len)
{
@@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
}
static int read_object_code(u64 addr, size_t len, u8 cpumode,
- struct thread *thread, struct state *state)
+ struct thread *thread,
+ struct rb_root *tested_sections)
{
struct addr_location al;
unsigned char buf1[BUFSZ] = {0};
unsigned char buf2[BUFSZ] = {0};
size_t ret_len;
u64 objdump_addr;
+ u64 skip_addr;
const char *objdump_name;
char decomp_name[KMOD_DECOMP_LEN];
bool decomp = false;
@@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
goto out;
}
+ /*
+ * Don't retest the same addresses. objdump struggles with kcore - try
+ * each map only once even if the address is different.
+ */
+ skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
+ if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
+ tested_sections)) {
+ pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
+ dso__long_name(dso), skip_addr);
+ goto out;
+ }
+
pr_debug("On file address is: %#"PRIx64"\n", al.addr);
if (len > BUFSZ)
@@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
goto out;
}
- /* objdump struggles with kcore - try each map only once */
- if (dso__is_kcore(dso)) {
- size_t d;
-
- for (d = 0; d < state->done_cnt; d++) {
- if (state->done[d] == map__start(al.map)) {
- pr_debug("kcore map tested already");
- pr_debug(" - skipping\n");
- goto out;
- }
- }
- if (state->done_cnt >= ARRAY_SIZE(state->done)) {
- pr_debug("Too many kcore maps - skipping\n");
- goto out;
- }
- state->done[state->done_cnt++] = map__start(al.map);
- }
-
objdump_name = dso__long_name(dso);
if (dso__needs_decompress(dso)) {
if (dso__decompress_kmodule_path(dso, objdump_name,
@@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
return err;
}
-static int process_sample_event(struct machine *machine,
- struct evlist *evlist,
- union perf_event *event, struct state *state)
+static int process_sample_event(struct machine *machine, struct evlist *evlist,
+ union perf_event *event,
+ struct rb_root *tested_sections)
{
struct perf_sample sample;
struct thread *thread;
@@ -494,7 +544,8 @@ static int process_sample_event(struct machine *machine,
goto out;
}
- ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
+ ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
+ tested_sections);
thread__put(thread);
out:
perf_sample__exit(&sample);
@@ -502,10 +553,11 @@ static int process_sample_event(struct machine *machine,
}
static int process_event(struct machine *machine, struct evlist *evlist,
- union perf_event *event, struct state *state)
+ union perf_event *event, struct rb_root *tested_sections)
{
if (event->header.type == PERF_RECORD_SAMPLE)
- return process_sample_event(machine, evlist, event, state);
+ return process_sample_event(machine, evlist, event,
+ tested_sections);
if (event->header.type == PERF_RECORD_THROTTLE ||
event->header.type == PERF_RECORD_UNTHROTTLE)
@@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
}
static int process_events(struct machine *machine, struct evlist *evlist,
- struct state *state)
+ struct rb_root *tested_sections)
{
union perf_event *event;
struct mmap *md;
@@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
continue;
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
- ret = process_event(machine, evlist, event, state);
+ ret = process_event(machine, evlist, event, tested_sections);
perf_mmap__consume(&md->core);
if (ret < 0)
return ret;
@@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
.uses_mmap = true,
},
};
- struct state state = {
- .done_cnt = 0,
- };
+ struct rb_root tested_sections = RB_ROOT;
struct perf_thread_map *threads = NULL;
struct perf_cpu_map *cpus = NULL;
struct evlist *evlist = NULL;
@@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
evlist__disable(evlist);
- ret = process_events(machine, evlist, &state);
+ ret = process_events(machine, evlist, &tested_sections);
if (ret < 0)
goto out_put;
@@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
perf_thread_map__put(threads);
machine__delete(machine);
perf_env__exit(&host_env);
+ tested_sections__free(&tested_sections);
return err;
}
---
base-commit: a22d167ed82505f770340c3a7c257c04ba24dac9
change-id: 20251006-james-perf-object-code-reading-9646e486d595
Best regards,
--
James Clark <james.clark@...aro.org>
Powered by blists - more mailing lists