[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1461905018-86355-9-git-send-email-davidcc@google.com>
Date: Thu, 28 Apr 2016 21:43:14 -0700
From: David Carrillo-Cisneros <davidcc@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Ingo Molnar <mingo@...hat.com>
Cc: Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
Matt Fleming <matt.fleming@...el.com>,
Tony Luck <tony.luck@...el.com>,
Stephane Eranian <eranian@...gle.com>,
Paul Turner <pjt@...gle.com>,
David Carrillo-Cisneros <davidcc@...gle.com>, x86@...nel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH 08/32] perf/x86/intel/cqm: prepare for next patches
Move code around, delete unnecesary code and do some renaming in
in order to increase readibility of next patches. Create cqm.h file.
Reviewed-by: Stephane Eranian <eranian@...gle.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
arch/x86/events/intel/cqm.c | 170 +++++++++++++++-----------------------------
arch/x86/events/intel/cqm.h | 42 +++++++++++
include/linux/perf_event.h | 8 +--
3 files changed, 103 insertions(+), 117 deletions(-)
create mode 100644 arch/x86/events/intel/cqm.h
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index d5eac8f..f678014 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -4,10 +4,9 @@
* Based very, very heavily on work by Peter Zijlstra.
*/
-#include <linux/perf_event.h>
#include <linux/slab.h>
#include <asm/cpu_device_id.h>
-#include <asm/pqr_common.h>
+#include "cqm.h"
#include "../perf_event.h"
#define MSR_IA32_QM_CTR 0x0c8e
@@ -16,13 +15,26 @@
static u32 cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+#define RMID_VAL_ERROR (1ULL << 63)
+#define RMID_VAL_UNAVAIL (1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
+
+#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
+
+#define CQM_EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+}
+
/*
* Updates caller cpu's cache.
*/
static inline void __update_pqr_rmid(u32 rmid)
{
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
if (state->rmid == rmid)
return;
state->rmid = rmid;
@@ -30,37 +42,18 @@ static inline void __update_pqr_rmid(u32 rmid)
}
/*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-#define CQM_EVENT_ATTR_STR(_name, v, str) \
-static struct perf_pmu_events_attr event_attr_##v = { \
- .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
- .id = 0, \
- .event_str = str, \
-}
-
-/*
* Groups of events that have the same target(s), one RMID per group.
+ * Protected by cqm_mutex.
*/
static LIST_HEAD(cache_groups);
+static DEFINE_MUTEX(cqm_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
/*
* Mask of CPUs for reading CQM values. We only need one per-socket.
*/
static cpumask_t cqm_cpumask;
-#define RMID_VAL_ERROR (1ULL << 63)
-#define RMID_VAL_UNAVAIL (1ULL << 62)
-
-#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
-
-#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
/*
* This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
@@ -71,8 +64,6 @@ static cpumask_t cqm_cpumask;
*/
static u32 intel_cqm_rotation_rmid;
-#define INVALID_RMID (-1)
-
/*
* Is @rmid valid for programming the hardware?
*
@@ -140,7 +131,7 @@ struct cqm_rmid_entry {
* rotation worker moves RMIDs from the limbo list to the free list once
* the occupancy value drops below __intel_cqm_threshold.
*
- * Both lists are protected by cache_mutex.
+ * Both lists are protected by cqm_mutex.
*/
static LIST_HEAD(cqm_rmid_free_lru);
static LIST_HEAD(cqm_rmid_limbo_lru);
@@ -172,13 +163,13 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
/*
* Returns < 0 on fail.
*
- * We expect to be called with cache_mutex held.
+ * We expect to be called with cqm_mutex held.
*/
static u32 __get_rmid(void)
{
struct cqm_rmid_entry *entry;
- lockdep_assert_held(&cache_mutex);
+ lockdep_assert_held(&cqm_mutex);
if (list_empty(&cqm_rmid_free_lru))
return INVALID_RMID;
@@ -193,7 +184,7 @@ static void __put_rmid(u32 rmid)
{
struct cqm_rmid_entry *entry;
- lockdep_assert_held(&cache_mutex);
+ lockdep_assert_held(&cqm_mutex);
WARN_ON(!__rmid_valid(rmid));
entry = __rmid_entry(rmid);
@@ -237,9 +228,9 @@ static int intel_cqm_setup_rmid_cache(void)
entry = __rmid_entry(0);
list_del(&entry->list);
- mutex_lock(&cache_mutex);
+ mutex_lock(&cqm_mutex);
intel_cqm_rotation_rmid = __get_rmid();
- mutex_unlock(&cache_mutex);
+ mutex_unlock(&cqm_mutex);
return 0;
fail:
@@ -250,6 +241,7 @@ fail:
return -ENOMEM;
}
+
/*
* Determine if @a and @b measure the same set of tasks.
*
@@ -287,49 +279,11 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
return false;
}
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
- if (event->attach_state & PERF_ATTACH_TASK)
- return perf_cgroup_from_task(event->hw.target, event->ctx);
-
- return event->cgrp;
-}
-#endif
-
struct rmid_read {
u32 rmid;
atomic64_t value;
};
-static void intel_cqm_event_read(struct perf_event *event);
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */
-
static struct pmu intel_cqm_pmu;
/*
@@ -344,7 +298,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
bool conflict = false;
u32 rmid;
- list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+ list_for_each_entry(iter, &cache_groups, hw.cqm_event_groups_entry) {
rmid = iter->hw.cqm_rmid;
if (__match_event(iter, event)) {
@@ -390,24 +344,24 @@ out:
static inline bool cqm_group_leader(struct perf_event *event)
{
- return !list_empty(&event->hw.cqm_groups_entry);
+ return !list_empty(&event->hw.cqm_event_groups_entry);
}
static void intel_cqm_event_start(struct perf_event *event, int mode)
{
- if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+ if (!(event->hw.state & PERF_HES_STOPPED))
return;
- event->hw.cqm_state &= ~PERF_HES_STOPPED;
+ event->hw.state &= ~PERF_HES_STOPPED;
__update_pqr_rmid(event->hw.cqm_rmid);
}
static void intel_cqm_event_stop(struct perf_event *event, int mode)
{
- if (event->hw.cqm_state & PERF_HES_STOPPED)
+ if (event->hw.state & PERF_HES_STOPPED)
return;
- event->hw.cqm_state |= PERF_HES_STOPPED;
+ event->hw.state |= PERF_HES_STOPPED;
intel_cqm_event_read(event);
__update_pqr_rmid(0);
}
@@ -419,7 +373,7 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
raw_spin_lock_irqsave(&cache_lock, flags);
- event->hw.cqm_state = PERF_HES_STOPPED;
+ event->hw.state = PERF_HES_STOPPED;
rmid = event->hw.cqm_rmid;
if (__rmid_valid(rmid) && (mode & PERF_EF_START))
@@ -433,16 +387,16 @@ static void intel_cqm_event_destroy(struct perf_event *event)
{
struct perf_event *group_other = NULL;
- mutex_lock(&cache_mutex);
+ mutex_lock(&cqm_mutex);
/*
* If there's another event in this group...
*/
- if (!list_empty(&event->hw.cqm_group_entry)) {
- group_other = list_first_entry(&event->hw.cqm_group_entry,
+ if (!list_empty(&event->hw.cqm_event_group_entry)) {
+ group_other = list_first_entry(&event->hw.cqm_event_group_entry,
struct perf_event,
- hw.cqm_group_entry);
- list_del(&event->hw.cqm_group_entry);
+ hw.cqm_event_group_entry);
+ list_del(&event->hw.cqm_event_group_entry);
}
/*
@@ -454,18 +408,18 @@ static void intel_cqm_event_destroy(struct perf_event *event)
* destroy the group and return the RMID.
*/
if (group_other) {
- list_replace(&event->hw.cqm_groups_entry,
- &group_other->hw.cqm_groups_entry);
+ list_replace(&event->hw.cqm_event_groups_entry,
+ &group_other->hw.cqm_event_groups_entry);
} else {
u32 rmid = event->hw.cqm_rmid;
if (__rmid_valid(rmid))
__put_rmid(rmid);
- list_del(&event->hw.cqm_groups_entry);
+ list_del(&event->hw.cqm_event_groups_entry);
}
}
- mutex_unlock(&cache_mutex);
+ mutex_unlock(&cqm_mutex);
}
static int intel_cqm_event_init(struct perf_event *event)
@@ -488,25 +442,26 @@ static int intel_cqm_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */
return -EINVAL;
- INIT_LIST_HEAD(&event->hw.cqm_group_entry);
- INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+ INIT_LIST_HEAD(&event->hw.cqm_event_groups_entry);
+ INIT_LIST_HEAD(&event->hw.cqm_event_group_entry);
event->destroy = intel_cqm_event_destroy;
- mutex_lock(&cache_mutex);
+ mutex_lock(&cqm_mutex);
+
/* Will also set rmid */
intel_cqm_setup_event(event, &group);
if (group) {
- list_add_tail(&event->hw.cqm_group_entry,
- &group->hw.cqm_group_entry);
+ list_add_tail(&event->hw.cqm_event_group_entry,
+ &group->hw.cqm_event_group_entry);
} else {
- list_add_tail(&event->hw.cqm_groups_entry,
- &cache_groups);
+ list_add_tail(&event->hw.cqm_event_groups_entry,
+ &cache_groups);
}
- mutex_unlock(&cache_mutex);
+ mutex_unlock(&cqm_mutex);
return 0;
}
@@ -543,14 +498,14 @@ static struct attribute_group intel_cqm_format_group = {
};
static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
- char *page)
+max_recycle_threshold_show(
+ struct device *dev, struct device_attribute *attr, char *page)
{
ssize_t rv;
- mutex_lock(&cache_mutex);
+ mutex_lock(&cqm_mutex);
rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
- mutex_unlock(&cache_mutex);
+ mutex_unlock(&cqm_mutex);
return rv;
}
@@ -560,25 +515,16 @@ max_recycle_threshold_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- unsigned int bytes, cachelines;
+ unsigned int bytes;
int ret;
ret = kstrtouint(buf, 0, &bytes);
if (ret)
return ret;
- mutex_lock(&cache_mutex);
-
+ mutex_lock(&cqm_mutex);
__intel_cqm_max_threshold = bytes;
- cachelines = bytes / cqm_l3_scale;
-
- /*
- * The new maximum takes effect immediately.
- */
- if (__intel_cqm_threshold > cachelines)
- __intel_cqm_threshold = cachelines;
-
- mutex_unlock(&cache_mutex);
+ mutex_unlock(&cqm_mutex);
return count;
}
@@ -602,7 +548,7 @@ static const struct attribute_group *intel_cqm_attr_groups[] = {
};
static struct pmu intel_cqm_pmu = {
- .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+ .hrtimer_interval_ms = CQM_DEFAULT_ROTATION_PERIOD,
.attr_groups = intel_cqm_attr_groups,
.task_ctx_nr = perf_sw_context,
.event_init = intel_cqm_event_init,
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
new file mode 100644
index 0000000..e25d0a1
--- /dev/null
+++ b/arch/x86/events/intel/cqm.h
@@ -0,0 +1,42 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * A Resource Manager ID (RMID) is a u32 value that, when programmed in a
+ * logical CPU, will allow the LLC cache to associate the changes in occupancy
+ * generated by that cpu (cache lines allocations - deallocations) to the RMID.
+ * If an rmid has been assigned to a thread T long enough for all cache lines
+ * used by T to be allocated, then the occupancy reported by the hardware is
+ * equal to the total cache occupancy for T.
+ *
+ * Groups of threads that are to be monitored together (such as cgroups
+ * or processes) can shared a RMID.
+ *
+ * This driver implements a tree hierarchy of Monitored Resources (monr). Each
+ * monr is a cgroup, a process or a thread that needs one single RMID.
+ */
+
+#include <linux/perf_event.h>
+#include <asm/pqr_common.h>
+
+/*
+ * Minimum time elapsed between reads of occupancy value for an RMID when
+ * transversing the monr hierarchy.
+ */
+#define RMID_DEFAULT_MIN_UPDATE_TIME 20 /* ms */
+
+# define INVALID_RMID (-1)
+
+/*
+ * Time between execution of rotation logic. The frequency of execution does
+ * not affect the rate at which RMIDs are recycled, except by the delay by the
+ * delay updating the prmid's and their pools.
+ * The rotation period is stored in pmu->hrtimer_interval_ms.
+ */
+#define CQM_DEFAULT_ROTATION_PERIOD 1200 /* ms */
+
+/*
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ * It's units are bytes must be scaled by cqm_l3_scale to obtain cache lines.
+ */
+static unsigned int __intel_cqm_max_threshold;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3a847bf..5eb7dea 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -120,11 +120,9 @@ struct hw_perf_event {
};
#ifdef CONFIG_INTEL_RDT
struct { /* intel_cqm */
- int cqm_state;
- u32 cqm_rmid;
- struct list_head cqm_events_entry;
- struct list_head cqm_groups_entry;
- struct list_head cqm_group_entry;
+ u32 cqm_rmid;
+ struct list_head cqm_event_group_entry;
+ struct list_head cqm_event_groups_entry;
};
#endif
struct { /* itrace */
--
2.8.0.rc3.226.g39d4020
Powered by blists - more mailing lists