[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <42bcaa57-bab3-457f-83cc-d908303090f2@linux.intel.com>
Date: Thu, 8 Jan 2026 16:07:33 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: Lisa Robinson <lisa@...efly.space>, Peter Zijlstra
<peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>, Huacai Chen <chenhuacai@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>, James Clark
<james.clark@...aro.org>, WANG Xuerui <kernel@...0n.name>,
linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
loongarch@...ts.linux.dev
Subject: Re: [PATCH v2] LoongArch: Fix PMU counter allocation for mixed-type
event groups
On 1/5/2026 12:23 AM, Lisa Robinson wrote:
> When validating a perf event group, validate_group() unconditionally
> attempts to allocate hardware PMU counters for the leader, sibling
> events and the new event being added.
>
> This is incorrect for mixed-type groups. If a PERF_TYPE_SOFTWARE event
> ispart of the group, the current code still tries to allocate a hardware
ispart -> "is part"
> PMU counter for it, which can wrongly consume hardware PMU resources and
> cause spurious allocation failures.
>
> Fix this by only allocating PMU counters for hardware events during group
> validation, and skipping software events.
>
> A trimmed down reproducer is as simple as this:
>
> #include <stdio.h>
> #include <assert.h>
> #include <unistd.h>
> #include <string.h>
> #include <sys/syscall.h>
> #include <linux/perf_event.h>
>
> int
> main (int argc, char *argv[])
> {
> struct perf_event_attr attr = { 0 };
> int fds[5];
>
> attr.disabled = 1;
> attr.exclude_kernel = 1;
> attr.exclude_hv = 1;
> attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
> PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP;
> attr.size = sizeof (attr);
>
> attr.type = PERF_TYPE_SOFTWARE;
> attr.config = PERF_COUNT_SW_DUMMY;
> fds[0] = syscall (SYS_perf_event_open, &attr, 0, -1, -1, 0);
> assert (fds[0] >= 0);
>
> attr.type = PERF_TYPE_HARDWARE;
> attr.config = PERF_COUNT_HW_CPU_CYCLES;
> fds[1] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
> assert (fds[1] >= 0);
>
> attr.type = PERF_TYPE_HARDWARE;
> attr.config = PERF_COUNT_HW_INSTRUCTIONS;
> fds[2] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
> assert (fds[2] >= 0);
>
> attr.type = PERF_TYPE_HARDWARE;
> attr.config = PERF_COUNT_HW_BRANCH_MISSES;
> fds[3] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
> assert (fds[3] >= 0);
>
> attr.type = PERF_TYPE_HARDWARE;
> attr.config = PERF_COUNT_HW_CACHE_REFERENCES;
> fds[4] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
> assert (fds[4] >= 0);
>
> printf ("PASSED\n");
>
> return 0;
> }
>
> Fixes: b37042b2bb7c ("LoongArch: Add perf events support")
> Signed-off-by: Lisa Robinson <lisa@...efly.space>
> ---
> Changes in v2:
> - Factor out duplicated perf event type checks into an inline helper.
> ---
> arch/loongarch/kernel/perf_event.c | 21 ++++++++++++++++++---
> 1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
> index 9d257c8519c9..e34a6fb33e11 100644
> --- a/arch/loongarch/kernel/perf_event.c
> +++ b/arch/loongarch/kernel/perf_event.c
> @@ -626,6 +626,18 @@ static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 conf
> return pev;
> }
>
> +static inline bool loongarch_pmu_event_requires_counter(const struct perf_event *event)
> +{
> + switch (event->attr.type) {
> + case PERF_TYPE_HARDWARE:
> + case PERF_TYPE_HW_CACHE:
> + case PERF_TYPE_RAW:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> static int validate_group(struct perf_event *event)
> {
> struct cpu_hw_events fake_cpuc;
> @@ -633,15 +645,18 @@ static int validate_group(struct perf_event *event)
>
> memset(&fake_cpuc, 0, sizeof(fake_cpuc));
>
> - if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
> + if (loongarch_pmu_event_requires_counter(leader) &&
> + loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
> return -EINVAL;
>
> for_each_sibling_event(sibling, leader) {
> - if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
> + if (loongarch_pmu_event_requires_counter(sibling) &&
> + loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
> return -EINVAL;
> }
>
> - if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
> + if (loongarch_pmu_event_requires_counter(event) &&
> + loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
> return -EINVAL;
>
> return 0;
The code looks good to me, but I'm not quite familiar the loongarch perf
code, then I won't give a reviewed-by and leave it to loongarch perf
experts. Thanks.
Powered by blists - more mailing lists