linux-kernel - [PATCH 08/32] perf/x86/intel/cqm: prepare for next patches

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1461905018-86355-9-git-send-email-davidcc@google.com>
Date:	Thu, 28 Apr 2016 21:43:14 -0700
From:	David Carrillo-Cisneros <davidcc@...gle.com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Ingo Molnar <mingo@...hat.com>
Cc:	Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
	Matt Fleming <matt.fleming@...el.com>,
	Tony Luck <tony.luck@...el.com>,
	Stephane Eranian <eranian@...gle.com>,
	Paul Turner <pjt@...gle.com>,
	David Carrillo-Cisneros <davidcc@...gle.com>, x86@...nel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 08/32] perf/x86/intel/cqm: prepare for next patches

Move code around, delete unnecesary code and do some renaming in
in order to increase readibility of next patches. Create cqm.h file.

Reviewed-by: Stephane Eranian <eranian@...gle.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
 arch/x86/events/intel/cqm.c | 170 +++++++++++++++-----------------------------
 arch/x86/events/intel/cqm.h |  42 +++++++++++
 include/linux/perf_event.h  |   8 +--
 3 files changed, 103 insertions(+), 117 deletions(-)
 create mode 100644 arch/x86/events/intel/cqm.h

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index d5eac8f..f678014 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -4,10 +4,9 @@
  * Based very, very heavily on work by Peter Zijlstra.
  */
 
-#include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <asm/cpu_device_id.h>
-#include <asm/pqr_common.h>
+#include "cqm.h"
 #include "../perf_event.h"
 
 #define MSR_IA32_QM_CTR		0x0c8e
@@ -16,13 +15,26 @@
 static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
+#define RMID_VAL_ERROR		(1ULL << 63)
+#define RMID_VAL_UNAVAIL	(1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID	(1 << 0)
+
+#define QOS_EVENT_MASK		QOS_L3_OCCUP_EVENT_ID
+
+#define CQM_EVENT_ATTR_STR(_name, v, str)					\
+static struct perf_pmu_events_attr event_attr_##v = {				\
+	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL),	\
+	.id		= 0,							\
+	.event_str	= str,							\
+}
+
 /*
  * Updates caller cpu's cache.
  */
 static inline void __update_pqr_rmid(u32 rmid)
 {
 	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
 	if (state->rmid == rmid)
 		return;
 	state->rmid = rmid;
@@ -30,37 +42,18 @@ static inline void __update_pqr_rmid(u32 rmid)
 }
 
 /*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-#define CQM_EVENT_ATTR_STR(_name, v, str)					\
-static struct perf_pmu_events_attr event_attr_##v = {				\
-	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL),	\
-	.id		= 0,							\
-	.event_str	= str,							\
-}
-
-/*
  * Groups of events that have the same target(s), one RMID per group.
+ * Protected by cqm_mutex.
  */
 static LIST_HEAD(cache_groups);
+static DEFINE_MUTEX(cqm_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
 
 /*
  * Mask of CPUs for reading CQM values. We only need one per-socket.
  */
 static cpumask_t cqm_cpumask;
 
-#define RMID_VAL_ERROR		(1ULL << 63)
-#define RMID_VAL_UNAVAIL	(1ULL << 62)
-
-#define QOS_L3_OCCUP_EVENT_ID	(1 << 0)
-
-#define QOS_EVENT_MASK	QOS_L3_OCCUP_EVENT_ID
 
 /*
  * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
@@ -71,8 +64,6 @@ static cpumask_t cqm_cpumask;
  */
 static u32 intel_cqm_rotation_rmid;
 
-#define INVALID_RMID		(-1)
-
 /*
  * Is @rmid valid for programming the hardware?
  *
@@ -140,7 +131,7 @@ struct cqm_rmid_entry {
  * rotation worker moves RMIDs from the limbo list to the free list once
  * the occupancy value drops below __intel_cqm_threshold.
  *
- * Both lists are protected by cache_mutex.
+ * Both lists are protected by cqm_mutex.
  */
 static LIST_HEAD(cqm_rmid_free_lru);
 static LIST_HEAD(cqm_rmid_limbo_lru);
@@ -172,13 +163,13 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
 /*
  * Returns < 0 on fail.
  *
- * We expect to be called with cache_mutex held.
+ * We expect to be called with cqm_mutex held.
  */
 static u32 __get_rmid(void)
 {
 	struct cqm_rmid_entry *entry;
 
-	lockdep_assert_held(&cache_mutex);
+	lockdep_assert_held(&cqm_mutex);
 
 	if (list_empty(&cqm_rmid_free_lru))
 		return INVALID_RMID;
@@ -193,7 +184,7 @@ static void __put_rmid(u32 rmid)
 {
 	struct cqm_rmid_entry *entry;
 
-	lockdep_assert_held(&cache_mutex);
+	lockdep_assert_held(&cqm_mutex);
 
 	WARN_ON(!__rmid_valid(rmid));
 	entry = __rmid_entry(rmid);
@@ -237,9 +228,9 @@ static int intel_cqm_setup_rmid_cache(void)
 	entry = __rmid_entry(0);
 	list_del(&entry->list);
 
-	mutex_lock(&cache_mutex);
+	mutex_lock(&cqm_mutex);
 	intel_cqm_rotation_rmid = __get_rmid();
-	mutex_unlock(&cache_mutex);
+	mutex_unlock(&cqm_mutex);
 
 	return 0;
 fail:
@@ -250,6 +241,7 @@ fail:
 	return -ENOMEM;
 }
 
+
 /*
  * Determine if @a and @b measure the same set of tasks.
  *
@@ -287,49 +279,11 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
 	return false;
 }
 
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
-	if (event->attach_state & PERF_ATTACH_TASK)
-		return perf_cgroup_from_task(event->hw.target, event->ctx);
-
-	return event->cgrp;
-}
-#endif
-
 struct rmid_read {
 	u32 rmid;
 	atomic64_t value;
 };
 
-static void intel_cqm_event_read(struct perf_event *event);
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250	/* ms */
-
 static struct pmu intel_cqm_pmu;
 
 /*
@@ -344,7 +298,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
 	bool conflict = false;
 	u32 rmid;
 
-	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+	list_for_each_entry(iter, &cache_groups, hw.cqm_event_groups_entry) {
 		rmid = iter->hw.cqm_rmid;
 
 		if (__match_event(iter, event)) {
@@ -390,24 +344,24 @@ out:
 
 static inline bool cqm_group_leader(struct perf_event *event)
 {
-	return !list_empty(&event->hw.cqm_groups_entry);
+	return !list_empty(&event->hw.cqm_event_groups_entry);
 }
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+	if (!(event->hw.state & PERF_HES_STOPPED))
 		return;
 
-	event->hw.cqm_state &= ~PERF_HES_STOPPED;
+	event->hw.state &= ~PERF_HES_STOPPED;
 	__update_pqr_rmid(event->hw.cqm_rmid);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-	if (event->hw.cqm_state & PERF_HES_STOPPED)
+	if (event->hw.state & PERF_HES_STOPPED)
 		return;
 
-	event->hw.cqm_state |= PERF_HES_STOPPED;
+	event->hw.state |= PERF_HES_STOPPED;
 	intel_cqm_event_read(event);
 	__update_pqr_rmid(0);
 }
@@ -419,7 +373,7 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 
 	raw_spin_lock_irqsave(&cache_lock, flags);
 
-	event->hw.cqm_state = PERF_HES_STOPPED;
+	event->hw.state = PERF_HES_STOPPED;
 	rmid = event->hw.cqm_rmid;
 
 	if (__rmid_valid(rmid) && (mode & PERF_EF_START))
@@ -433,16 +387,16 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 {
 	struct perf_event *group_other = NULL;
 
-	mutex_lock(&cache_mutex);
+	mutex_lock(&cqm_mutex);
 
 	/*
 	 * If there's another event in this group...
 	 */
-	if (!list_empty(&event->hw.cqm_group_entry)) {
-		group_other = list_first_entry(&event->hw.cqm_group_entry,
+	if (!list_empty(&event->hw.cqm_event_group_entry)) {
+		group_other = list_first_entry(&event->hw.cqm_event_group_entry,
 					       struct perf_event,
-					       hw.cqm_group_entry);
-		list_del(&event->hw.cqm_group_entry);
+					       hw.cqm_event_group_entry);
+		list_del(&event->hw.cqm_event_group_entry);
 	}
 
 	/*
@@ -454,18 +408,18 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 		 * destroy the group and return the RMID.
 		 */
 		if (group_other) {
-			list_replace(&event->hw.cqm_groups_entry,
-				     &group_other->hw.cqm_groups_entry);
+			list_replace(&event->hw.cqm_event_groups_entry,
+				     &group_other->hw.cqm_event_groups_entry);
 		} else {
 			u32 rmid = event->hw.cqm_rmid;
 
 			if (__rmid_valid(rmid))
 				__put_rmid(rmid);
-			list_del(&event->hw.cqm_groups_entry);
+			list_del(&event->hw.cqm_event_groups_entry);
 		}
 	}
 
-	mutex_unlock(&cache_mutex);
+	mutex_unlock(&cqm_mutex);
 }
 
 static int intel_cqm_event_init(struct perf_event *event)
@@ -488,25 +442,26 @@ static int intel_cqm_event_init(struct perf_event *event)
 	    event->attr.sample_period) /* no sampling */
 		return -EINVAL;
 
-	INIT_LIST_HEAD(&event->hw.cqm_group_entry);
-	INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+	INIT_LIST_HEAD(&event->hw.cqm_event_groups_entry);
+	INIT_LIST_HEAD(&event->hw.cqm_event_group_entry);
 
 	event->destroy = intel_cqm_event_destroy;
 
-	mutex_lock(&cache_mutex);
+	mutex_lock(&cqm_mutex);
+
 
 	/* Will also set rmid */
 	intel_cqm_setup_event(event, &group);
 
 	if (group) {
-		list_add_tail(&event->hw.cqm_group_entry,
-			      &group->hw.cqm_group_entry);
+		list_add_tail(&event->hw.cqm_event_group_entry,
+				&group->hw.cqm_event_group_entry);
 	} else {
-		list_add_tail(&event->hw.cqm_groups_entry,
-			      &cache_groups);
+		list_add_tail(&event->hw.cqm_event_groups_entry,
+				&cache_groups);
 	}
 
-	mutex_unlock(&cache_mutex);
+	mutex_unlock(&cqm_mutex);
 
 	return 0;
 }
@@ -543,14 +498,14 @@ static struct attribute_group intel_cqm_format_group = {
 };
 
 static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
-			   char *page)
+max_recycle_threshold_show(
+	struct device *dev, struct device_attribute *attr, char *page)
 {
 	ssize_t rv;
 
-	mutex_lock(&cache_mutex);
+	mutex_lock(&cqm_mutex);
 	rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
-	mutex_unlock(&cache_mutex);
+	mutex_unlock(&cqm_mutex);
 
 	return rv;
 }
@@ -560,25 +515,16 @@ max_recycle_threshold_store(struct device *dev,
 			    struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	unsigned int bytes, cachelines;
+	unsigned int bytes;
 	int ret;
 
 	ret = kstrtouint(buf, 0, &bytes);
 	if (ret)
 		return ret;
 
-	mutex_lock(&cache_mutex);
-
+	mutex_lock(&cqm_mutex);
 	__intel_cqm_max_threshold = bytes;
-	cachelines = bytes / cqm_l3_scale;
-
-	/*
-	 * The new maximum takes effect immediately.
-	 */
-	if (__intel_cqm_threshold > cachelines)
-		__intel_cqm_threshold = cachelines;
-
-	mutex_unlock(&cache_mutex);
+	mutex_unlock(&cqm_mutex);
 
 	return count;
 }
@@ -602,7 +548,7 @@ static const struct attribute_group *intel_cqm_attr_groups[] = {
 };
 
 static struct pmu intel_cqm_pmu = {
-	.hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+	.hrtimer_interval_ms = CQM_DEFAULT_ROTATION_PERIOD,
 	.attr_groups	     = intel_cqm_attr_groups,
 	.task_ctx_nr	     = perf_sw_context,
 	.event_init	     = intel_cqm_event_init,
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
new file mode 100644
index 0000000..e25d0a1
--- /dev/null
+++ b/arch/x86/events/intel/cqm.h
@@ -0,0 +1,42 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * A Resource Manager ID (RMID) is a u32 value that, when programmed in a
+ * logical CPU, will allow the LLC cache to associate the changes in occupancy
+ * generated by that cpu (cache lines allocations - deallocations) to the RMID.
+ * If an rmid has been assigned to a thread T long enough for all cache lines
+ * used by T to be allocated, then the occupancy reported by the hardware is
+ * equal to the total cache occupancy for T.
+ *
+ * Groups of threads that are to be monitored together (such as cgroups
+ * or processes) can shared a RMID.
+ *
+ * This driver implements a tree hierarchy of Monitored Resources (monr). Each
+ * monr is a cgroup, a process or a thread that needs one single RMID.
+ */
+
+#include <linux/perf_event.h>
+#include <asm/pqr_common.h>
+
+/*
+ * Minimum time elapsed between reads of occupancy value for an RMID when
+ * transversing the monr hierarchy.
+ */
+#define RMID_DEFAULT_MIN_UPDATE_TIME 20	/* ms */
+
+# define INVALID_RMID (-1)
+
+/*
+ * Time between execution of rotation logic. The frequency of execution does
+ * not affect the rate at which RMIDs are recycled, except by the delay by the
+ * delay updating the prmid's and their pools.
+ * The rotation period is stored in pmu->hrtimer_interval_ms.
+ */
+#define CQM_DEFAULT_ROTATION_PERIOD 1200	/* ms */
+
+/*
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ * It's units are bytes must be scaled by cqm_l3_scale to obtain cache lines.
+ */
+static unsigned int __intel_cqm_max_threshold;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3a847bf..5eb7dea 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -120,11 +120,9 @@ struct hw_perf_event {
 		};
 #ifdef CONFIG_INTEL_RDT
 		struct { /* intel_cqm */
-			int			cqm_state;
-			u32			cqm_rmid;
-			struct list_head	cqm_events_entry;
-			struct list_head	cqm_groups_entry;
-			struct list_head	cqm_group_entry;
+			u32                     cqm_rmid;
+			struct list_head	cqm_event_group_entry;
+			struct list_head	cqm_event_groups_entry;
 		};
 #endif
 		struct { /* itrace */
-- 
2.8.0.rc3.226.g39d4020