[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fUZ_tXR7nqQjkNHgGbJ4dMLdOf0umR=y_hf9xJuCbfgfw@mail.gmail.com>
Date: Fri, 1 Nov 2024 09:14:40 -0700
From: Ian Rogers <irogers@...gle.com>
To: Chun-Tse Shao <ctshao@...gle.com>
Cc: linux-kernel@...r.kernel.org, Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>, Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>, Kan <kan.liang@...ux.intel.com>,
Ze Gao <zegao2021@...il.com>, Yang Jihong <yangjihong1@...wei.com>,
Weilin Wang <weilin.wang@...el.com>, linux-perf-users@...r.kernel.org
Subject: Re: [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY
On Thu, Oct 31, 2024 at 3:39 PM Chun-Tse Shao <ctshao@...gle.com> wrote:
>
> It parses fdinfo with PMU type, comparing with the event which failed to
> open, and report the processes causing EBUSY error.
>
> ```
> Testing cycles and intel_pt//
> $ ./perf stat -e cycles &
> [1] 55569
> $ ./perf stat -e intel_pt// &
> [2] 55683
> $ ./perf stat -e intel_pt//
> Error:
> The PMU intel_pt counters are busy and in use by another process.
> Possible processes:
> 55683 ./perf stat -e intel_pt//
> ```
> Only perf with intel_pt was reported.
I think this is a very nice addition. It is a shame there is a race
with the existing process exiting, between the perf_event_open and the
/proc scanning. A PMU may return EBUSY just because say perf list is
probing features on the PMU so we probably have some extra retries for
EBUSY too.
> Signed-off-by: Chun-Tse Shao <ctshao@...gle.com>
> ---
> tools/perf/util/evsel.c | 79 +++++++++++++++++++++++++++++------------
> 1 file changed, 57 insertions(+), 22 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 9a5b6a6f8d2e5..d2f7c19e023ec 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -3286,7 +3286,8 @@ static bool find_process(const char *name)
> return ret ? false : true;
> }
>
> -static int dump_perf_event_processes(char *msg, size_t size)
> +static int dump_perf_event_processes(const struct perf_event_attr *failed_attr,
> + char *msg, size_t size)
> {
> DIR *proc_dir;
> struct dirent *proc_entry;
> @@ -3327,29 +3328,61 @@ static int dump_perf_event_processes(char *msg, size_t size)
> continue;
> /* Take care as readlink doesn't null terminate the string. */
> if (!strncmp(path, "anon_inode:[perf_event]", link_size)) {
> - int cmdline_fd;
> - ssize_t cmdline_size;
> -
> - scnprintf(path, sizeof(path), "%s/cmdline", proc_entry->d_name);
> - cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> - if (cmdline_fd == -1)
> - continue;
> - cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
> - close(cmdline_fd);
> - if (cmdline_size < 0)
> + int fdinfo_fd;
> + ssize_t fdinfo_size;
> + char *line;
> + u32 perf_event_type = PERF_TYPE_MAX;
PERF_TYPE_MAX is beyond the pre-defined perf PMU types at 6 but PMU
drivers loaded by the kernel may use this number - I think task-clock
may use this PMU type number but it shouldn't return EBUSY. Anyway, I
think -1 would be a better marker to use here and in the corresponding
check below.
Thanks,
Ian
> +
> + /* Let's check the PMU type reserved by this process */
> + scnprintf(path, sizeof(path), "%s/fdinfo/%s",
> + proc_entry->d_name, fd_entry->d_name);
> + fdinfo_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> + fdinfo_size = read(fdinfo_fd, path, sizeof(path) - 1);
> + if (fdinfo_size < 0)
> continue;
> - path[cmdline_size] = '\0';
> - for (ssize_t i = 0; i < cmdline_size; i++) {
> - if (path[i] == '\0')
> - path[i] = ' ';
> + path[fdinfo_size] = '\0';
> +
> + line = strtok(path, "\n");
> + while (line != NULL) {
> + if (sscanf(line,
> + "perf_event-attr.type:\t%u",
> + &perf_event_type) == 1)
> + break;
> + line = strtok(NULL, "\n");
> }
>
> - if (printed == 0)
> - printed += scnprintf(msg, size, "Possible processes:\n");
> -
> - printed += scnprintf(msg + printed, size - printed,
> - "%s %s\n", proc_entry->d_name, path);
> - break;
> + /* Report the process which reserves the conflicted PMU. */
> + /* If fdinfo does not contain PMU type, report it too. */
> + if (perf_event_type == failed_attr->type ||
> + perf_event_type == PERF_TYPE_MAX) {
> + int cmdline_fd;
> + ssize_t cmdline_size;
> +
> + scnprintf(path, sizeof(path),
> + "%s/cmdline",
> + proc_entry->d_name);
> + cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> + if (cmdline_fd == -1)
> + continue;
> + cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
> + close(cmdline_fd);
> + if (cmdline_size < 0)
> + continue;
> + path[cmdline_size] = '\0';
> + for (ssize_t i = 0; i < cmdline_size; i++) {
> + if (path[i] == '\0')
> + path[i] = ' ';
> + }
> +
> + if (printed == 0)
> + printed += scnprintf(
> + msg, size,
> + "Possible processes:\n");
> +
> + printed += scnprintf(msg + printed, size - printed,
> + "%s %s\n", proc_entry->d_name, path);
> + break;
> + }
> }
> }
> closedir(fd_dir);
> @@ -3458,7 +3491,9 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
> msg, size,
> "The PMU %s counters are busy and in use by another process.\n",
> evsel->pmu ? evsel->pmu->name : "");
> - return printed + dump_perf_event_processes(msg + printed, size - printed);
> + return printed + dump_perf_event_processes(&evsel->core.attr,
> + msg + printed,
> + size - printed);
> break;
> case EINVAL:
> if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
> --
> 2.47.0.163.g1226f6d8fa-goog
>
Powered by blists - more mailing lists