[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20220313172144.78141-3-simon.wy@alibaba-inc.com>
Date: Mon, 14 Mar 2022 01:21:44 +0800
From: Wen Yang <simon.wy@...baba-inc.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Thomas Gleixner <tglx@...utronix.de>
Cc: Wen Yang <simon.wy@...baba-inc.com>,
Stephane Eranian <eranian@...gle.com>,
Mark Rutland <mark.rutland@....com>,
Jiri Olsa <jolsa@...hat.com>,
Namhyung Kim <namhyung@...nel.org>,
Borislav Petkov <bp@...en8.de>, x86@...nel.org,
Wen Yang <wenyang@...ux.alibaba.com>,
"H. Peter Anvin" <hpa@...or.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [RESEND PATCH v2 3/3] perf/x86: reuse scarce pmu counters
The nmi watchdog may permanently consume a fixed counter (*cycles*),
so when other programs collect *cycles* again, they will occupy a GP.
Here is a slight optimization: save a generic counter for events that
are non-sampling type and using a fixed counter.
Signed-off-by: Wen Yang <simon.wy@...baba-inc.com>
Cc: Peter Zijlstra (Intel) <peterz@...radead.org>
Cc: Stephane Eranian <eranian@...gle.com>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>
Cc: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...en8.de>
Cc: x86@...nel.org
Cc: Wen Yang <wenyang@...ux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: linux-perf-users@...r.kernel.org
Cc: linux-kernel@...r.kernel.org
---
arch/x86/events/core.c | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b6ea220..95cfec6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -799,6 +799,7 @@ struct perf_sched {
u64 msk_counters;
u64 msk_events;
struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
struct sched_state saved[SCHED_STATES_MAX];
};
@@ -846,7 +847,8 @@ static int perf_sched_calc_event(struct event_constraint **constraints,
/*
* Initialize iterator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+static void perf_sched_init(struct perf_sched *sched,
+ struct perf_event **events, struct event_constraint **constraints,
int num, int wmin, int wmax, int gpmax, u64 mevt, u64 mcnt)
{
memset(sched, 0, sizeof(*sched));
@@ -854,6 +856,7 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
sched->max_weight = wmax;
sched->max_gp = gpmax;
sched->constraints = constraints;
+ sched->events = events;
sched->msk_events = mevt;
sched->msk_counters = mcnt;
@@ -896,6 +899,7 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
+ struct perf_event *e;
int idx;
if (!sched->state.unassigned)
@@ -905,16 +909,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
return false;
c = sched->constraints[sched->state.event];
+ e = sched->events[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
u64 mask = BIT_ULL(idx);
- if (sched->msk_counters & mask)
+ if ((sched->msk_counters & mask) && is_sampling_event(e))
continue;
- if (sched->state.used & mask)
+ if ((sched->state.used & mask) && is_sampling_event(e))
continue;
sched->state.used |= mask;
@@ -1016,14 +1021,15 @@ static void perf_sched_obtain_used_registers(int *assign, int n, u64 *events, u6
}
}
-static int __perf_assign_events(struct event_constraint **constraints, int n,
+static int __perf_assign_events(struct perf_event **events,
+ struct event_constraint **constraints, int n,
int wmin, int wmax, int gpmax, int *assign)
{
u64 mevt, mcnt;
struct perf_sched sched;
perf_sched_obtain_used_registers(assign, n, &mevt, &mcnt);
- perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax, mevt, mcnt);
+ perf_sched_init(&sched, events, constraints, n, wmin, wmax, gpmax, mevt, mcnt);
do {
if (!perf_sched_find_counter(&sched))
@@ -1035,6 +1041,13 @@ static int __perf_assign_events(struct event_constraint **constraints, int n,
return sched.state.unassigned;
}
+static bool is_pmc_reuseable(struct perf_event *e,
+ struct event_constraint *c)
+{
+ return (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) &&
+ (!is_sampling_event(e));
+}
+
/*
* Assign a counter for each event.
*/
@@ -1043,6 +1056,7 @@ int perf_assign_events(struct perf_event **event_list,
int wmin, int wmax, int gpmax, int *assign)
{
struct event_constraint *c;
+ struct perf_event *e;
struct hw_perf_event *hwc;
u64 used_mask = 0;
int unsched = 0;
@@ -1058,6 +1072,7 @@ int perf_assign_events(struct perf_event **event_list,
hwc = &event_list[i]->hw;
c = constraints[i];
+ e = event_list[i];
/* never assigned */
if (hwc->idx == -1)
@@ -1072,8 +1087,10 @@ int perf_assign_events(struct perf_event **event_list,
mask |= mask << 1;
/* not already used */
- if (used_mask & mask)
- break;
+ if (used_mask & mask) {
+ if (!is_pmc_reuseable(e, c))
+ break;
+ }
used_mask |= mask;
@@ -1083,12 +1100,12 @@ int perf_assign_events(struct perf_event **event_list,
/* slow path */
if (i != n) {
- unsched = __perf_assign_events(constraints, n,
+ unsched = __perf_assign_events(event_list, constraints, n,
wmin, wmax, gpmax, assign);
if (unsched) {
memset(assign, -1, n * sizeof(int));
- unsched = __perf_assign_events(constraints, n,
+ unsched = __perf_assign_events(event_list, constraints, n,
wmin, wmax, gpmax, assign);
}
}
--
1.8.3.1
Powered by blists - more mailing lists