[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241101211757.824743-1-ctshao@google.com>
Date: Fri, 1 Nov 2024 21:17:55 +0000
From: Chun-Tse Shao <ctshao@...gle.com>
To: linux-kernel@...r.kernel.org
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
Liang Kan <kan.liang@...ux.intel.com>, Ze Gao <zegao2021@...il.com>,
Chun-Tse Shao <ctshao@...gle.com>, Yang Jihong <yangjihong1@...wei.com>,
Weilin Wang <weilin.wang@...el.com>, linux-perf-users@...r.kernel.org
Subject: [PATCH v2 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY
From: Ian Rogers <irogers@...gle.com>
The existing EBUSY strerror message is:
```
The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//).
"dmesg | grep -i perf" may provide additional information.
```
The dmesg won't be useful. What is more useful is knowing what
processes are potentially using the PMU, which some procfs scanning can
reveal. When parallel testing tests/shell/stat_all_pmu.sh this yields:
```
Testing intel_bts//
Error:
The PMU intel_bts counters are busy and in use by another process.
Possible processes:
2585882 perf list
2585902 perf list -j -o /tmp/__perf_test.list_output.json.KF9MY
2585904 perf list
2585911 perf record -e task-clock --filter period > 1 -o /dev/null --quiet true
2585912 perf list
2585915 perf list
2586042 /tmp/perf/perf record -asdg -e cpu-clock -o /tmp/perftool-testsuite_report.dIF/perf_report/perf.data -- sleep 2
2589078 perf record -g -e task-clock:u -o - perf test -w noploop
2589148 /tmp/perf/perf record --control=fifo:control,ack -e cpu-clock -m 1 sleep 10
2589379 perf --buildid-dir /tmp/perf.debug.Umx record --buildid-all -o /tmp/perf.data.YBm /tmp/perf.ex.MD5.ZQW
2589568 perf record -o /tmp/__perf_test.program.mtcZH/perf.data --branch-filter any,save_type,u -- perf test -w brstack
2589649 perf record --per-thread -o /tmp/__perf_test.perf.data.5d3dc perf test -w thloop
2589898 perf record -o /tmp/perf-test-script.BX2b27Dcnj/pp-perf.data --sample-cpu uname
```
Which gets a little closer to finding the issue.
Signed-off-by: Ian Rogers <irogers@...gle.com>
---
tools/perf/util/evsel.c | 79 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 78 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbf9c8cee3c56..9a5b6a6f8d2e5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3286,6 +3286,78 @@ static bool find_process(const char *name)
return ret ? false : true;
}
+static int dump_perf_event_processes(char *msg, size_t size)
+{
+ DIR *proc_dir;
+ struct dirent *proc_entry;
+ int printed = 0;
+
+ proc_dir = opendir(procfs__mountpoint());
+ if (!proc_dir)
+ return 0;
+
+ /* Walk through the /proc directory. */
+ while ((proc_entry = readdir(proc_dir)) != NULL) {
+ char path[PATH_MAX];
+ DIR *fd_dir;
+ struct dirent *fd_entry;
+ int fd_dir_fd;
+
+ if ((proc_entry->d_type != DT_DIR) ||
+ !strcmp(".", proc_entry->d_name) ||
+ !strcmp("..", proc_entry->d_name))
+ continue;
+
+ scnprintf(path, sizeof(path), "%s/fd", proc_entry->d_name);
+ fd_dir_fd = openat(dirfd(proc_dir), path, O_DIRECTORY);
+ if (fd_dir_fd == -1)
+ continue;
+ fd_dir = fdopendir(fd_dir_fd);
+ if (!fd_dir) {
+ close(fd_dir_fd);
+ continue;
+ }
+ while ((fd_entry = readdir(fd_dir)) != NULL) {
+ ssize_t link_size;
+
+ if (fd_entry->d_type != DT_LNK)
+ continue;
+ link_size = readlinkat(fd_dir_fd, fd_entry->d_name, path, sizeof(path));
+ if (link_size < 0)
+ continue;
+ /* Take care as readlink doesn't null terminate the string. */
+ if (!strncmp(path, "anon_inode:[perf_event]", link_size)) {
+ int cmdline_fd;
+ ssize_t cmdline_size;
+
+ scnprintf(path, sizeof(path), "%s/cmdline", proc_entry->d_name);
+ cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
+ if (cmdline_fd == -1)
+ continue;
+ cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
+ close(cmdline_fd);
+ if (cmdline_size < 0)
+ continue;
+ path[cmdline_size] = '\0';
+ for (ssize_t i = 0; i < cmdline_size; i++) {
+ if (path[i] == '\0')
+ path[i] = ' ';
+ }
+
+ if (printed == 0)
+ printed += scnprintf(msg, size, "Possible processes:\n");
+
+ printed += scnprintf(msg + printed, size - printed,
+ "%s %s\n", proc_entry->d_name, path);
+ break;
+ }
+ }
+ closedir(fd_dir);
+ }
+ closedir(proc_dir);
+ return printed;
+}
+
int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
char *msg __maybe_unused,
size_t size __maybe_unused)
@@ -3319,7 +3391,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
printed += scnprintf(msg, size,
"No permission to enable %s event.\n\n", evsel__name(evsel));
- return scnprintf(msg + printed, size - printed,
+ return printed + scnprintf(msg + printed, size - printed,
"Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
"access to performance monitoring and observability operations for processes\n"
"without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
@@ -3382,6 +3454,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"The PMU counters are busy/taken by another profiler.\n"
"We found oprofile daemon running, please stop it and try again.");
+ printed += scnprintf(
+ msg, size,
+ "The PMU %s counters are busy and in use by another process.\n",
+ evsel->pmu ? evsel->pmu->name : "");
+ return printed + dump_perf_event_processes(msg + printed, size - printed);
break;
case EINVAL:
if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
--
2.47.0.163.g1226f6d8fa-goog
Powered by blists - more mailing lists