[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251106071241.141234-7-irogers@google.com>
Date: Wed, 5 Nov 2025 23:12:37 -0800
From: Ian Rogers <irogers@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
"Dr. David Alan Gilbert" <linux@...blig.org>, Yang Li <yang.lee@...ux.alibaba.com>,
James Clark <james.clark@...aro.org>, Thomas Falcon <thomas.falcon@...el.com>,
Thomas Richter <tmricht@...ux.ibm.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org, Andi Kleen <ak@...ux.intel.com>,
Dapeng Mi <dapeng1.mi@...ux.intel.com>
Subject: [PATCH v3 6/9] perf tool_pmu: More accurately set the cpus for tool events
The user and system time events can record on different CPUs, but for
all other events a single CPU map of just CPU 0 makes sense. In
parse-events detect a tool PMU and then pass the perf_event_attr so
that the tool_pmu can return CPUs specific for the event. This avoids
a CPU map of all online CPUs being used for events like
duration_time. Avoiding this avoids the evlist CPUs containing CPUs
for which duration_time just gives 0. Minimizing the evlist CPUs can
remove unnecessary sched_setaffinity syscalls that delay metric
calculations.
Signed-off-by: Ian Rogers <irogers@...gle.com>
---
tools/perf/util/parse-events.c | 9 +++++++--
tools/perf/util/tool_pmu.c | 19 +++++++++++++++++++
tools/perf/util/tool_pmu.h | 1 +
3 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 0c0dc20b1c13..7b2422ccb554 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -30,6 +30,7 @@
#include "util/event.h"
#include "util/bpf-filter.h"
#include "util/stat.h"
+#include "util/tool_pmu.h"
#include "util/util.h"
#include "tracepoint.h"
#include <api/fs/tracing_path.h>
@@ -227,8 +228,12 @@ __add_event(struct list_head *list, int *idx,
if (pmu) {
is_pmu_core = pmu->is_core;
pmu_cpus = perf_cpu_map__get(pmu->cpus);
- if (perf_cpu_map__is_empty(pmu_cpus))
- pmu_cpus = cpu_map__online();
+ if (perf_cpu_map__is_empty(pmu_cpus)) {
+ if (perf_pmu__is_tool(pmu))
+ pmu_cpus = tool_pmu__cpus(attr);
+ else
+ pmu_cpus = cpu_map__online();
+ }
} else {
is_pmu_core = (attr->type == PERF_TYPE_HARDWARE ||
attr->type == PERF_TYPE_HW_CACHE);
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index b895e88ff740..03864a8f5e91 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -2,6 +2,7 @@
#include "cgroup.h"
#include "counts.h"
#include "cputopo.h"
+#include "debug.h"
#include "evsel.h"
#include "pmu.h"
#include "print-events.h"
@@ -12,6 +13,7 @@
#include <api/fs/fs.h>
#include <api/io.h>
#include <internal/threadmap.h>
+#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <fcntl.h>
#include <strings.h>
@@ -106,6 +108,23 @@ const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
return tool_pmu__event_to_str(evsel->core.attr.config);
}
+struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr)
+{
+ static struct perf_cpu_map *cpu0_map;
+ enum tool_pmu_event event = (enum tool_pmu_event)attr->config;
+
+ if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) {
+ pr_err("Invalid tool PMU event config %llx\n", attr->config);
+ return NULL;
+ }
+ if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME)
+ return cpu_map__online();
+
+ if (!cpu0_map)
+ cpu0_map = perf_cpu_map__new_int(0);
+ return perf_cpu_map__get(cpu0_map);
+}
+
static bool read_until_char(struct io *io, char e)
{
int c;
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index d642e7d73910..e95fcbd55384 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -39,6 +39,7 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu
u64 tool_pmu__cpu_slots_per_cycle(void);
bool perf_pmu__is_tool(const struct perf_pmu *pmu);
+struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr);
bool evsel__is_tool(const struct evsel *evsel);
enum tool_pmu_event evsel__tool_event(const struct evsel *evsel);
--
2.51.2.1041.gc1ab5b90ca-goog
Powered by blists - more mailing lists