linux-kernel - [PATCH 04/32] perf/x86/intel/cqm: make read of RMIDs per package (Temporal)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1461905018-86355-5-git-send-email-davidcc@google.com>
Date:	Thu, 28 Apr 2016 21:43:10 -0700
From:	David Carrillo-Cisneros <davidcc@...gle.com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Ingo Molnar <mingo@...hat.com>
Cc:	Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
	Matt Fleming <matt.fleming@...el.com>,
	Tony Luck <tony.luck@...el.com>,
	Stephane Eranian <eranian@...gle.com>,
	Paul Turner <pjt@...gle.com>,
	David Carrillo-Cisneros <davidcc@...gle.com>, x86@...nel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 04/32] perf/x86/intel/cqm: make read of RMIDs per package (Temporal)

The previous version of Intel's CQM introduced pmu::count as a replacement
for reading CQM events. This was done to avoid using an IPI to read the
CQM occupancy event when reading events attached to a thread.
Using pmu->count in place of pmu->read is inconsistent with the usage by
other PMUs and introduces several problems such as:
  1) pmu::read for thread events returns bogus values when called from
  interrupts disabled contexts.
  2) perf_event_count(), behavior depends on whether interruptions are
  enabled or not.
  3) perf_event_count() will always read a fresh value from the PMU, which
  is inconsistent with the behavior of other events.
  4) perf_event_count() will perform slow MSR read and writes and IPIs.

This patches removes pmu::count from CQM and makes pmu::read always
read from the local socket (package). Future patches will add a mechanism
to add the event count from other packages.

This patch also removes the unused field rmid_usecnt from intel_pqr_state.

Reviewed-by: Stephane Eranian <eranian@...gle.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
 arch/x86/events/intel/cqm.c | 125 ++++++--------------------------------------
 1 file changed, 16 insertions(+), 109 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 3c1e247..afd60dd 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -20,7 +20,6 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */
  * struct intel_pqr_state - State cache for the PQR MSR
  * @rmid:		The cached Resource Monitoring ID
  * @closid:		The cached Class Of Service ID
- * @rmid_usecnt:	The usage counter for rmid
  *
  * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
  * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
@@ -32,7 +31,6 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 struct intel_pqr_state {
 	u32			rmid;
 	u32			closid;
-	int			rmid_usecnt;
 };
 
 /*
@@ -44,6 +42,19 @@ struct intel_pqr_state {
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
+ * Updates caller cpu's cache.
+ */
+static inline void __update_pqr_rmid(u32 rmid)
+{
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+	if (state->rmid == rmid)
+		return;
+	state->rmid = rmid;
+	wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+}
+
+/*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
  * Also protects event->hw.cqm_rmid
  *
@@ -309,7 +320,7 @@ struct rmid_read {
 	atomic64_t value;
 };
 
-static void __intel_cqm_event_count(void *info);
+static void intel_cqm_event_read(struct perf_event *event);
 
 /*
  * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
@@ -376,12 +387,6 @@ static void intel_cqm_event_read(struct perf_event *event)
 	u32 rmid;
 	u64 val;
 
-	/*
-	 * Task events are handled by intel_cqm_event_count().
-	 */
-	if (event->cpu == -1)
-		return;
-
 	raw_spin_lock_irqsave(&cache_lock, flags);
 	rmid = event->hw.cqm_rmid;
 
@@ -401,123 +406,28 @@ out:
 	raw_spin_unlock_irqrestore(&cache_lock, flags);
 }
 
-static void __intel_cqm_event_count(void *info)
-{
-	struct rmid_read *rr = info;
-	u64 val;
-
-	val = __rmid_read(rr->rmid);
-
-	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-		return;
-
-	atomic64_add(val, &rr->value);
-}
-
 static inline bool cqm_group_leader(struct perf_event *event)
 {
 	return !list_empty(&event->hw.cqm_groups_entry);
 }
 
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
-	unsigned long flags;
-	struct rmid_read rr = {
-		.value = ATOMIC64_INIT(0),
-	};
-
-	/*
-	 * We only need to worry about task events. System-wide events
-	 * are handled like usual, i.e. entirely with
-	 * intel_cqm_event_read().
-	 */
-	if (event->cpu != -1)
-		return __perf_event_count(event);
-
-	/*
-	 * Only the group leader gets to report values. This stops us
-	 * reporting duplicate values to userspace, and gives us a clear
-	 * rule for which task gets to report the values.
-	 *
-	 * Note that it is impossible to attribute these values to
-	 * specific packages - we forfeit that ability when we create
-	 * task events.
-	 */
-	if (!cqm_group_leader(event))
-		return 0;
-
-	/*
-	 * Getting up-to-date values requires an SMP IPI which is not
-	 * possible if we're being called in interrupt context. Return
-	 * the cached values instead.
-	 */
-	if (unlikely(in_interrupt()))
-		goto out;
-
-	/*
-	 * Notice that we don't perform the reading of an RMID
-	 * atomically, because we can't hold a spin lock across the
-	 * IPIs.
-	 *
-	 * Speculatively perform the read, since @event might be
-	 * assigned a different (possibly invalid) RMID while we're
-	 * busying performing the IPI calls. It's therefore necessary to
-	 * check @event's RMID afterwards, and if it has changed,
-	 * discard the result of the read.
-	 */
-	rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
-	if (!__rmid_valid(rr.rmid))
-		goto out;
-
-	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
-
-	raw_spin_lock_irqsave(&cache_lock, flags);
-	if (event->hw.cqm_rmid == rr.rmid)
-		local64_set(&event->count, atomic64_read(&rr.value));
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
-	return __perf_event_count(event);
-}
-
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-	u32 rmid = event->hw.cqm_rmid;
-
 	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
 		return;
 
 	event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
-	if (state->rmid_usecnt++) {
-		if (!WARN_ON_ONCE(state->rmid != rmid))
-			return;
-	} else {
-		WARN_ON_ONCE(state->rmid);
-	}
-
-	state->rmid = rmid;
-	wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+	__update_pqr_rmid(event->hw.cqm_rmid);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
 	if (event->hw.cqm_state & PERF_HES_STOPPED)
 		return;
 
 	event->hw.cqm_state |= PERF_HES_STOPPED;
-
 	intel_cqm_event_read(event);
-
-	if (!--state->rmid_usecnt) {
-		state->rmid = 0;
-		wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
-	} else {
-		WARN_ON_ONCE(!state->rmid);
-	}
+	__update_pqr_rmid(0);
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -534,7 +444,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 		intel_cqm_event_start(event, mode);
 
 	raw_spin_unlock_irqrestore(&cache_lock, flags);
-
 	return 0;
 }
 
@@ -720,7 +629,6 @@ static struct pmu intel_cqm_pmu = {
 	.start		     = intel_cqm_event_start,
 	.stop		     = intel_cqm_event_stop,
 	.read		     = intel_cqm_event_read,
-	.count		     = intel_cqm_event_count,
 };
 
 static inline void cqm_pick_event_reader(int cpu)
@@ -743,7 +651,6 @@ static void intel_cqm_cpu_starting(unsigned int cpu)
 
 	state->rmid = 0;
 	state->closid = 0;
-	state->rmid_usecnt = 0;
 
 	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
 	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-- 
2.8.0.rc3.226.g39d4020