lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Mon, 14 Mar 2022 00:50:47 +0800
From:   Wen Yang <simon.wy@...baba-inc.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Thomas Gleixner <tglx@...utronix.de>
Cc:     Wen Yang <simon.wy@...baba-inc.com>,
        Stephane Eranian <eranian@...gle.com>,
        Mark Rutland <mark.rutland@....com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Borislav Petkov <bp@...en8.de>, x86@...nel.org,
        Wen Yang <wenyang@...ux.alibaba.com>,
        "H. Peter Anvin" <hpa@...or.com>, linux-perf-users@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: [PATCH v2 3/3] perf/x86: reuse scarce pmu counters

The nmi watchdog may permanently consume a fixed counter (*cycles*),
so when other programs collect *cycles* again, they will occupy a GP.
Here is a slight optimization: save a generic counter for events that
are non-sampling type and using a fixed counter.

Signed-off-by: Wen Yang <simon.wy@...baba-inc.com>
Cc: Peter Zijlstra (Intel) <peterz@...radead.org>
Cc: Stephane Eranian <eranian@...gle.com>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>
Cc: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...en8.de>
Cc: x86@...nel.org
Cc: Wen Yang <wenyang@...ux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: linux-perf-users@...r.kernel.org
Cc: linux-kernel@...r.kernel.org
---
 arch/x86/events/core.c | 45 +++++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b7f5925..6ddddf1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -799,6 +799,7 @@ struct perf_sched {
 	u64			msk_counters;
 	u64			msk_events;
 	struct event_constraint	**constraints;
+	struct perf_event	**events;
 	struct sched_state	state;
 	struct sched_state	saved[SCHED_STATES_MAX];
 };
@@ -846,7 +847,8 @@ static int perf_sched_calc_event(struct event_constraint **constraints,
 /*
  * Initialize iterator that runs through all events and counters.
  */
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+static void perf_sched_init(struct perf_sched *sched,
+			    struct perf_event **events, struct event_constraint **constraints,
 			    int num, int wmin, int wmax, int gpmax, u64 mevt, u64 mcnt)
 {
 	memset(sched, 0, sizeof(*sched));
@@ -854,12 +856,13 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
 	sched->max_weight	= wmax;
 	sched->max_gp		= gpmax;
 	sched->constraints	= constraints;
+	sched->events		= events;
 	sched->msk_events   = mevt;
 	sched->msk_counters = mcnt;
 
 	sched->state.weight = perf_sched_calc_weight(constraints, num, wmin, wmax, mcnt);
 	sched->state.event = perf_sched_calc_event(constraints, num, sched->state.weight, mevt);
-	sched->state.unassigned = num - hweight_long(sched->state.event);
+	sched->state.unassigned = num - hweight_long(mevt);
 }
 
 static void perf_sched_save_state(struct perf_sched *sched)
@@ -896,6 +899,7 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
 static bool __perf_sched_find_counter(struct perf_sched *sched)
 {
 	struct event_constraint *c;
+	struct perf_event *e;
 	int idx;
 
 	if (!sched->state.unassigned)
@@ -905,16 +909,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 		return false;
 
 	c = sched->constraints[sched->state.event];
+	e = sched->events[sched->state.event];
 	/* Prefer fixed purpose counters */
 	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
 		idx = INTEL_PMC_IDX_FIXED;
 		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 			u64 mask = BIT_ULL(idx);
 
-			if (sched->msk_counters & mask)
+			if ((sched->msk_counters & mask) && is_sampling_event(e))
 				continue;
 
-			if (sched->state.used & mask)
+			if ((sched->state.used & mask) && is_sampling_event(e))
 				continue;
 
 			sched->state.used |= mask;
@@ -1016,14 +1021,15 @@ static void perf_sched_obtain_used_registers(int *assign, int n, u64 *events, u6
 	}
 }
 
-static int __perf_assign_events(struct event_constraint **constraints, int n,
+static int __perf_assign_events(struct perf_event **events,
+			struct event_constraint **constraints, int n,
 			int wmin, int wmax, int gpmax, int *assign)
 {
-	u64 msk_events, msk_counters;
+	u64 mevt, mcnt;
 	struct perf_sched sched;
 
-	perf_sched_obtain_used_registers(assign, n, &msk_events, &msk_counters);
-	perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax, msk_events, msk_counters);
+	perf_sched_obtain_used_registers(assign, n, &mevt, &mcnt);
+	perf_sched_init(&sched, events, constraints, n, wmin, wmax, gpmax, mevt, mcnt);
 
 	do {
 		if (!perf_sched_find_counter(&sched))
@@ -1035,6 +1041,13 @@ static int __perf_assign_events(struct event_constraint **constraints, int n,
 	return sched.state.unassigned;
 }
 
+static bool is_pmc_reuseable(struct perf_event *e,
+		struct event_constraint *c)
+{
+	return c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED) &&
+		!is_sampling_event(e);
+}
+
 /*
  * Assign a counter for each event.
  */
@@ -1043,12 +1056,13 @@ int perf_assign_events(struct perf_event **event_list,
 		int wmin, int wmax, int gpmax, int *assign)
 {
 	struct event_constraint *c;
+	struct perf_event *e;
 	struct hw_perf_event *hwc;
 	u64 used_mask = 0;
 	int unsched = 0;
 	int i;
 
-	memset(assign, -1, n);
+	memset(assign, -1, n * sizeof(int));
 
 	/*
 	 * fastpath, try to reuse previous register
@@ -1058,6 +1072,7 @@ int perf_assign_events(struct perf_event **event_list,
 
 		hwc = &event_list[i]->hw;
 		c = constraints[i];
+		e = event_list[i];
 
 		/* never assigned */
 		if (hwc->idx == -1)
@@ -1072,8 +1087,10 @@ int perf_assign_events(struct perf_event **event_list,
 			mask |= mask << 1;
 
 		/* not already used */
-		if (used_mask & mask)
-			break;
+		if (used_mask & mask) {
+			if (!is_pmc_reuseable(e, c))
+				break;
+		}
 
 		used_mask |= mask;
 
@@ -1083,12 +1100,12 @@ int perf_assign_events(struct perf_event **event_list,
 
 	/* slow path */
 	if (i != n) {
-		unsched = __perf_assign_events(constraints, n,
+		unsched = __perf_assign_events(event_list, constraints, n,
 				wmin, wmax, gpmax, assign);
 
 		if (unsched) {
-			memset(assign, -1, n);
-			unsched = __perf_assign_events(constraints, n,
+			memset(assign, -1, n * sizeof(int));
+			unsched = __perf_assign_events(event_list, constraints, n,
 					wmin, wmax, gpmax, assign);
 		}
 	}
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ