[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220313172144.78141-1-simon.wy@alibaba-inc.com>
Date: Mon, 14 Mar 2022 01:21:42 +0800
From: Wen Yang <simon.wy@...baba-inc.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Thomas Gleixner <tglx@...utronix.de>
Cc: Wen Yang <simon.wy@...baba-inc.com>,
Stephane Eranian <eranian@...gle.com>,
Mark Rutland <mark.rutland@....com>,
Jiri Olsa <jolsa@...hat.com>,
Namhyung Kim <namhyung@...nel.org>,
Borislav Petkov <bp@...en8.de>, x86@...nel.org,
Wen Yang <wenyang@...ux.alibaba.com>,
"H. Peter Anvin" <hpa@...or.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [RESEND PATCH v2 1/3] perf/x86: extract code to assign perf events for both core and uncore
Following two patterns in x86 perf code are used in multiple places where
similar code is duplicated:
- fast path, try to reuse previous register
- slow path, assign a counter for each event
In order to improve code quality and prepare for following patch series
that also uses described patterns, extract the codes to perf_assign_events.
This commit doesn't change functionality.
Signed-off-by: Wen Yang <simon.wy@...baba-inc.com>
Cc: Peter Zijlstra (Intel) <peterz@...radead.org>
Cc: Stephane Eranian <eranian@...gle.com>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>
Cc: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...en8.de>
Cc: x86@...nel.org
Cc: Wen Yang <wenyang@...ux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: linux-perf-users@...r.kernel.org
Cc: linux-kernel@...r.kernel.org
---
arch/x86/events/core.c | 141 ++++++++++++++++++++++-------------------
arch/x86/events/intel/uncore.c | 31 +--------
arch/x86/events/perf_event.h | 6 +-
3 files changed, 82 insertions(+), 96 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e686c5e..b14fb1b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -950,10 +950,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
return true;
}
-/*
- * Assign a counter for each event.
- */
-int perf_assign_events(struct event_constraint **constraints, int n,
+static int __perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int gpmax, int *assign)
{
struct perf_sched sched;
@@ -969,16 +966,66 @@ int perf_assign_events(struct event_constraint **constraints, int n,
return sched.state.unassigned;
}
+
+/*
+ * Assign a counter for each event.
+ */
+int perf_assign_events(struct perf_event **event_list,
+ struct event_constraint **constraints, int n,
+ int wmin, int wmax, int gpmax, int *assign)
+{
+ struct event_constraint *c;
+ struct hw_perf_event *hwc;
+ u64 used_mask = 0;
+ int unsched = 0;
+ int i;
+
+ /*
+ * fastpath, try to reuse previous register
+ */
+ for (i = 0; i < n; i++) {
+ u64 mask;
+
+ hwc = &event_list[i]->hw;
+ c = constraints[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c->idxmsk))
+ break;
+
+ mask = BIT_ULL(hwc->idx);
+ if (is_counter_pair(hwc))
+ mask |= mask << 1;
+
+ /* not already used */
+ if (used_mask & mask)
+ break;
+
+ used_mask |= mask;
+
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+
+ /* slow path */
+ if (i != n)
+ unsched = __perf_assign_events(constraints, n,
+ wmin, wmax, gpmax, assign);
+
+ return unsched;
+}
EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
int num_counters = hybrid(cpuc->pmu, num_counters);
- struct event_constraint *c;
- struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0;
- struct hw_perf_event *hwc;
- u64 used_mask = 0;
+ struct event_constraint *c;
+ int gpmax = num_counters;
/*
* Compute the number of events already present; see x86_pmu_add(),
@@ -1017,66 +1064,30 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
}
/*
- * fastpath, try to reuse previous register
+ * Do not allow scheduling of more than half the available
+ * generic counters.
+ *
+ * This helps avoid counter starvation of sibling thread by
+ * ensuring at most half the counters cannot be in exclusive
+ * mode. There is no designated counters for the limits. Any
+ * N/2 counters can be used. This helps with events with
+ * specific counter constraints.
*/
- for (i = 0; i < n; i++) {
- u64 mask;
-
- hwc = &cpuc->event_list[i]->hw;
- c = cpuc->event_constraint[i];
-
- /* never assigned */
- if (hwc->idx == -1)
- break;
-
- /* constraint still honored */
- if (!test_bit(hwc->idx, c->idxmsk))
- break;
-
- mask = BIT_ULL(hwc->idx);
- if (is_counter_pair(hwc))
- mask |= mask << 1;
-
- /* not already used */
- if (used_mask & mask)
- break;
+ if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+ READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+ gpmax /= 2;
- used_mask |= mask;
-
- if (assign)
- assign[i] = hwc->idx;
+ /*
+ * Reduce the amount of available counters to allow fitting
+ * the extra Merge events needed by large increment events.
+ */
+ if (x86_pmu.flags & PMU_FL_PAIR) {
+ gpmax = num_counters - cpuc->n_pair;
+ WARN_ON(gpmax <= 0);
}
- /* slow path */
- if (i != n) {
- int gpmax = num_counters;
-
- /*
- * Do not allow scheduling of more than half the available
- * generic counters.
- *
- * This helps avoid counter starvation of sibling thread by
- * ensuring at most half the counters cannot be in exclusive
- * mode. There is no designated counters for the limits. Any
- * N/2 counters can be used. This helps with events with
- * specific counter constraints.
- */
- if (is_ht_workaround_enabled() && !cpuc->is_fake &&
- READ_ONCE(cpuc->excl_cntrs->exclusive_present))
- gpmax /= 2;
-
- /*
- * Reduce the amount of available counters to allow fitting
- * the extra Merge events needed by large increment events.
- */
- if (x86_pmu.flags & PMU_FL_PAIR) {
- gpmax = num_counters - cpuc->n_pair;
- WARN_ON(gpmax <= 0);
- }
-
- unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
- wmax, gpmax, assign);
- }
+ unsched = perf_assign_events(cpuc->event_list, cpuc->event_constraint,
+ n, wmin, wmax, gpmax, assign);
/*
* In case of success (unsched = 0), mark events as committed,
@@ -1093,7 +1104,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
} else {
for (i = n0; i < n; i++) {
- e = cpuc->event_list[i];
+ struct perf_event *e = cpuc->event_list[i];
/*
* release events that failed scheduling
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index e497da9..101358a 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -442,12 +442,8 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box,
static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
{
- unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
struct event_constraint *c;
int i, wmin, wmax, ret = 0;
- struct hw_perf_event *hwc;
-
- bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = uncore_get_event_constraint(box, box->event_list[i]);
@@ -456,31 +452,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
wmax = max(wmax, c->weight);
}
- /* fastpath, try to reuse previous register */
- for (i = 0; i < n; i++) {
- hwc = &box->event_list[i]->hw;
- c = box->event_constraint[i];
-
- /* never assigned */
- if (hwc->idx == -1)
- break;
-
- /* constraint still honored */
- if (!test_bit(hwc->idx, c->idxmsk))
- break;
-
- /* not already used */
- if (test_bit(hwc->idx, used_mask))
- break;
-
- __set_bit(hwc->idx, used_mask);
- if (assign)
- assign[i] = hwc->idx;
- }
- /* slow path */
- if (i != n)
- ret = perf_assign_events(box->event_constraint, n,
- wmin, wmax, n, assign);
+ ret = perf_assign_events(box->event_list,
+ box->event_constraint, n, wmin, wmax, n, assign);
if (!assign || ret) {
for (i = 0; i < n; i++)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 150261d..f1acd1d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1130,8 +1130,10 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added);
-int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int gpmax, int *assign);
+int perf_assign_events(struct perf_event **event_list,
+ struct event_constraint **constraints, int n,
+ int wmin, int wmax, int gpmax, int *assign);
+
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
void x86_pmu_stop(struct perf_event *event, int flags);
--
1.8.3.1
Powered by blists - more mailing lists