[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20220313165047.77391-3-simon.wy@alibaba-inc.com>
Date: Mon, 14 Mar 2022 00:50:47 +0800
From: Wen Yang <simon.wy@...baba-inc.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Thomas Gleixner <tglx@...utronix.de>
Cc: Wen Yang <simon.wy@...baba-inc.com>,
Stephane Eranian <eranian@...gle.com>,
Mark Rutland <mark.rutland@....com>,
Jiri Olsa <jolsa@...hat.com>,
Namhyung Kim <namhyung@...nel.org>,
Borislav Petkov <bp@...en8.de>, x86@...nel.org,
Wen Yang <wenyang@...ux.alibaba.com>,
"H. Peter Anvin" <hpa@...or.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v2 3/3] perf/x86: reuse scarce pmu counters
The nmi watchdog may permanently consume a fixed counter (*cycles*),
so when other programs collect *cycles* again, they will occupy a GP.
Here is a slight optimization: save a generic counter for events that
are non-sampling type and using a fixed counter.
Signed-off-by: Wen Yang <simon.wy@...baba-inc.com>
Cc: Peter Zijlstra (Intel) <peterz@...radead.org>
Cc: Stephane Eranian <eranian@...gle.com>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>
Cc: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...en8.de>
Cc: x86@...nel.org
Cc: Wen Yang <wenyang@...ux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: linux-perf-users@...r.kernel.org
Cc: linux-kernel@...r.kernel.org
---
arch/x86/events/core.c | 45 +++++++++++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 14 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b7f5925..6ddddf1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -799,6 +799,7 @@ struct perf_sched {
u64 msk_counters;
u64 msk_events;
struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
struct sched_state saved[SCHED_STATES_MAX];
};
@@ -846,7 +847,8 @@ static int perf_sched_calc_event(struct event_constraint **constraints,
/*
* Initialize iterator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+static void perf_sched_init(struct perf_sched *sched,
+ struct perf_event **events, struct event_constraint **constraints,
int num, int wmin, int wmax, int gpmax, u64 mevt, u64 mcnt)
{
memset(sched, 0, sizeof(*sched));
@@ -854,12 +856,13 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
sched->max_weight = wmax;
sched->max_gp = gpmax;
sched->constraints = constraints;
+ sched->events = events;
sched->msk_events = mevt;
sched->msk_counters = mcnt;
sched->state.weight = perf_sched_calc_weight(constraints, num, wmin, wmax, mcnt);
sched->state.event = perf_sched_calc_event(constraints, num, sched->state.weight, mevt);
- sched->state.unassigned = num - hweight_long(sched->state.event);
+ sched->state.unassigned = num - hweight_long(mevt);
}
static void perf_sched_save_state(struct perf_sched *sched)
@@ -896,6 +899,7 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
+ struct perf_event *e;
int idx;
if (!sched->state.unassigned)
@@ -905,16 +909,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
return false;
c = sched->constraints[sched->state.event];
+ e = sched->events[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
u64 mask = BIT_ULL(idx);
- if (sched->msk_counters & mask)
+ if ((sched->msk_counters & mask) && is_sampling_event(e))
continue;
- if (sched->state.used & mask)
+ if ((sched->state.used & mask) && is_sampling_event(e))
continue;
sched->state.used |= mask;
@@ -1016,14 +1021,15 @@ static void perf_sched_obtain_used_registers(int *assign, int n, u64 *events, u6
}
}
-static int __perf_assign_events(struct event_constraint **constraints, int n,
+static int __perf_assign_events(struct perf_event **events,
+ struct event_constraint **constraints, int n,
int wmin, int wmax, int gpmax, int *assign)
{
- u64 msk_events, msk_counters;
+ u64 mevt, mcnt;
struct perf_sched sched;
- perf_sched_obtain_used_registers(assign, n, &msk_events, &msk_counters);
- perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax, msk_events, msk_counters);
+ perf_sched_obtain_used_registers(assign, n, &mevt, &mcnt);
+ perf_sched_init(&sched, events, constraints, n, wmin, wmax, gpmax, mevt, mcnt);
do {
if (!perf_sched_find_counter(&sched))
@@ -1035,6 +1041,13 @@ static int __perf_assign_events(struct event_constraint **constraints, int n,
return sched.state.unassigned;
}
+static bool is_pmc_reuseable(struct perf_event *e,
+ struct event_constraint *c)
+{
+ return c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED) &&
+ !is_sampling_event(e);
+}
+
/*
* Assign a counter for each event.
*/
@@ -1043,12 +1056,13 @@ int perf_assign_events(struct perf_event **event_list,
int wmin, int wmax, int gpmax, int *assign)
{
struct event_constraint *c;
+ struct perf_event *e;
struct hw_perf_event *hwc;
u64 used_mask = 0;
int unsched = 0;
int i;
- memset(assign, -1, n);
+ memset(assign, -1, n * sizeof(int));
/*
* fastpath, try to reuse previous register
@@ -1058,6 +1072,7 @@ int perf_assign_events(struct perf_event **event_list,
hwc = &event_list[i]->hw;
c = constraints[i];
+ e = event_list[i];
/* never assigned */
if (hwc->idx == -1)
@@ -1072,8 +1087,10 @@ int perf_assign_events(struct perf_event **event_list,
mask |= mask << 1;
/* not already used */
- if (used_mask & mask)
- break;
+ if (used_mask & mask) {
+ if (!is_pmc_reuseable(e, c))
+ break;
+ }
used_mask |= mask;
@@ -1083,12 +1100,12 @@ int perf_assign_events(struct perf_event **event_list,
/* slow path */
if (i != n) {
- unsched = __perf_assign_events(constraints, n,
+ unsched = __perf_assign_events(event_list, constraints, n,
wmin, wmax, gpmax, assign);
if (unsched) {
- memset(assign, -1, n);
- unsched = __perf_assign_events(constraints, n,
+ memset(assign, -1, n * sizeof(int));
+ unsched = __perf_assign_events(event_list, constraints, n,
wmin, wmax, gpmax, assign);
}
}
--
1.8.3.1
Powered by blists - more mailing lists