lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 19 Oct 2017 09:55:36 -0700
From:   kan.liang@...el.com
To:     tglx@...utronix.de, peterz@...radead.org, mingo@...hat.com,
        linux-kernel@...r.kernel.org
Cc:     acme@...nel.org, eranian@...gle.com, ak@...ux.intel.com,
        Kan Liang <Kan.liang@...el.com>
Subject: [PATCH V2 3/4] perf/x86/intel/uncore: add infrastructure for freerunning counters

From: Kan Liang <Kan.liang@...el.com>

There are a number of freerunning counters introduced for uncore.
For example, Skylake Server has IIO freerunning counters to collect
Input/Output x BW/Utilization.

The freerunning counter is similar as fixed counter, except it cannot
be written by SW. It needs to be specially handled in generic code and
not added in box->events list.

Introduce a new idx to indicate the freerunning counter. Only one idx is
enough for all freerunning counters. Because event and freerunning
counter are always 1:1 mapped. The freerunning counter is always
available. It doesn't need extra idx to indicate the assigned counter.

The event code for freerunning event is shared with fixed event, which
is 0xff. The umask of freerunning event starts from 0x10. The umask less
than 0x10 is reserved for fixed event.

The Freerunning counters could have different MSR location and offset.
Accordingly, they are divided into different types. Each type is limited
to only have at most 16 events.
So the umask of the first free running events type starts from 0x10. The
umask of the second starts from 0x20. The rest can be done in the same
manner.

Signed-off-by: Kan Liang <Kan.liang@...el.com>
---

Changes since V1:
 - Split the patch for generic change
 - Add more comments
 - Use unsigned int to replace unsigned
 - s/type/types/ for num_free_running_type
 - Use unified name freerunning

 arch/x86/events/intel/uncore.c |  61 +++++++++++++++++++++++--
 arch/x86/events/intel/uncore.h | 101 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 158 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 76c1c78..651046c 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -218,7 +218,9 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e
 	u64 prev_count, new_count, delta;
 	int shift;
 
-	if (uncore_pmc_fixed(event->hw.idx))
+	if (uncore_pmc_freerunning(event->hw.idx))
+		shift = 64 - uncore_freerunning_bits(box, event);
+	else if (uncore_pmc_fixed(event->hw.idx))
 		shift = 64 - uncore_fixed_ctr_bits(box);
 	else
 		shift = 64 - uncore_perf_ctr_bits(box);
@@ -362,6 +364,10 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 		if (n >= max_count)
 			return -EINVAL;
 
+		/* freerunning event is not tracked by box->events list */
+		if (uncore_pmc_freerunning(event->hw.idx))
+			continue;
+
 		box->event_list[n] = event;
 		n++;
 	}
@@ -454,10 +460,21 @@ static void uncore_pmu_event_start(struct perf_event *event, int flags)
 	struct intel_uncore_box *box = uncore_event_to_box(event);
 	int idx = event->hw.idx;
 
-	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
 		return;
 
-	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+	/*
+	 * Freerunning counters cannot be written by SW.
+	 * Does not need to enable it or add the event to box->events list.
+	 * Use current value as start point.
+	 */
+	if (uncore_pmc_freerunning(event->hw.idx)) {
+		local64_set(&event->hw.prev_count,
+			    uncore_read_counter(box, event));
+		return;
+	}
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 		return;
 
 	event->hw.state = 0;
@@ -479,6 +496,15 @@ static void uncore_pmu_event_stop(struct perf_event *event, int flags)
 	struct intel_uncore_box *box = uncore_event_to_box(event);
 	struct hw_perf_event *hwc = &event->hw;
 
+	/*
+	 * Does not need to disable freerunning counters.
+	 * Read current value as end.
+	 */
+	if (uncore_pmc_freerunning(hwc->idx)) {
+		uncore_perf_event_update(box, event);
+		return;
+	}
+
 	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
 		uncore_disable_event(box, event);
 		box->n_active--;
@@ -512,6 +538,17 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags)
 	if (!box)
 		return -ENODEV;
 
+	/*
+	 * Event and freerunning counters are 1:1 mapped
+	 * Don't need to assign event.
+	 */
+	if (uncore_pmc_freerunning(hwc->idx)) {
+		event->hw.event_base = uncore_freerunning_msr(box, event);
+		if (flags & PERF_EF_START)
+			uncore_pmu_event_start(event, 0);
+		return 0;
+	}
+
 	ret = n = uncore_collect_events(box, event, false);
 	if (ret < 0)
 		return ret;
@@ -570,6 +607,13 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags)
 
 	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 
+	/*
+	 * Freerunning counters cannot be written by SW.
+	 * No need to force event->hw.idx = -1 to reassign the counter.
+	 */
+	if (uncore_pmc_freerunning(event->hw.idx))
+		return;
+
 	for (i = 0; i < box->n_events; i++) {
 		if (event == box->event_list[i]) {
 			uncore_put_event_constraint(box, event);
@@ -603,6 +647,13 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 	struct intel_uncore_box *fake_box;
 	int ret = -EINVAL, n;
 
+	/*
+	 * Event and freerunning counters are 1:1 mapped,
+	 * which is always available.
+	 */
+	if (uncore_pmc_freerunning(event->hw.idx))
+		return 0;
+
 	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
 	if (!fake_box)
 		return -ENOMEM;
@@ -690,6 +741,10 @@ static int uncore_pmu_event_init(struct perf_event *event)
 
 		/* fixed counters have event field hardcoded to zero */
 		hwc->config = 0ULL;
+	} else if (is_freerunning_event(event)) {
+		if (!check_valid_freerunning_event(box, event))
+			return -EINVAL;
+		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
 	} else {
 		hwc->config = event->attr.config &
 			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 0ff08fba..3ecf2c4 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -11,8 +11,13 @@
 
 #define UNCORE_FIXED_EVENT		0xff
 #define UNCORE_PMC_IDX_MAX_GENERIC	8
+#define UNCORE_PMC_IDX_MAX_FIXED	1
+#define UNCORE_PMC_IDX_MAX_FREERUNNING	1
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PMC_IDX_FREERUNNING	(UNCORE_PMC_IDX_FIXED + \
+					UNCORE_PMC_IDX_MAX_FIXED)
+#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FREERUNNING + \
+					UNCORE_PMC_IDX_MAX_FREERUNNING)
 
 #define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx)	\
 		((dev << 24) | (func << 16) | (type << 8) | idx)
@@ -34,6 +39,7 @@ struct intel_uncore_ops;
 struct intel_uncore_pmu;
 struct intel_uncore_box;
 struct uncore_event_desc;
+struct freerunning_msr;
 
 struct intel_uncore_type {
 	const char *name;
@@ -41,6 +47,7 @@ struct intel_uncore_type {
 	int num_boxes;
 	int perf_ctr_bits;
 	int fixed_ctr_bits;
+	int num_freerunning_types;
 	unsigned perf_ctr;
 	unsigned event_ctl;
 	unsigned event_mask;
@@ -58,6 +65,7 @@ struct intel_uncore_type {
 	struct intel_uncore_pmu *pmus;
 	struct intel_uncore_ops *ops;
 	struct uncore_event_desc *event_descs;
+	struct freerunning_msr *freerunning;
 	const struct attribute_group *attr_groups[4];
 	struct pmu *pmu; /* for custom pmu ops */
 };
@@ -128,6 +136,13 @@ struct uncore_event_desc {
 	const char *config;
 };
 
+struct freerunning_msr {
+	unsigned int msr_base;
+	unsigned int msr_off;
+	unsigned int num_counters;
+	unsigned int bits;
+};
+
 struct pci2phy_map {
 	struct list_head list;
 	int segment;
@@ -161,6 +176,11 @@ static inline bool uncore_pmc_fixed(int idx)
 	return (idx == UNCORE_PMC_IDX_FIXED);
 }
 
+static inline bool uncore_pmc_freerunning(int idx)
+{
+	return (idx == UNCORE_PMC_IDX_FREERUNNING);
+}
+
 static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
 {
 	return box->pmu->type->box_ctl;
@@ -218,6 +238,44 @@ static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
 	return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
 }
 
+
+/*
+ * Freerunning counter is similar as fixed counter, except it cannot be
+ * written by SW.
+ *
+ * Freerunning MSR events have the same event code 0xff as fixed events.
+ * The Freerunning events umask starts from 0x10.
+ * The umask which is less than 0x10 is reserved for fixed events.
+ *
+ * The Freerunning events are divided into different types according to
+ * MSR location, bit width or definition. Each type is limited to only have
+ * at most 16 events.
+ * So the umask of first type starts from 0x10, the second starts from 0x20,
+ * the rest can be done in the same manner.
+ */
+#define UNCORE_FREERUNNING_MSR_START		0x10
+static inline unsigned int uncore_freerunning_msr_idx(u64 config)
+{
+	return ((config >> 8) & 0xf);
+}
+
+static inline unsigned int uncore_freerunning_msr_type(u64 config)
+{
+	return ((((config >> 8) - UNCORE_FREERUNNING_MSR_START) >> 4) & 0xf);
+}
+
+static inline
+unsigned int uncore_freerunning_msr(struct intel_uncore_box *box,
+				    struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_msr_type(event->attr.config);
+	unsigned int idx = uncore_freerunning_msr_idx(event->attr.config);
+	struct intel_uncore_pmu *pmu = box->pmu;
+
+	return pmu->type->freerunning[type].msr_base + idx +
+	       pmu->type->freerunning[type].msr_off * pmu->pmu_idx;
+}
+
 static inline
 unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
 {
@@ -280,11 +338,52 @@ static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
 	return box->pmu->type->fixed_ctr_bits;
 }
 
+static inline
+unsigned int uncore_freerunning_bits(struct intel_uncore_box *box,
+				     struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_msr_type(event->attr.config);
+
+	return box->pmu->type->freerunning[type].bits;
+}
+
+static inline int uncore_num_freerunning(struct intel_uncore_box *box,
+					 struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_msr_type(event->attr.config);
+
+	return box->pmu->type->freerunning[type].num_counters;
+}
+
+static inline int uncore_num_freerunning_types(struct intel_uncore_box *box,
+					       struct perf_event *event)
+{
+	return box->pmu->type->num_freerunning_types;
+}
+
+static inline bool check_valid_freerunning_event(struct intel_uncore_box *box,
+						 struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_msr_type(event->attr.config);
+	unsigned int idx = uncore_freerunning_msr_idx(event->attr.config);
+
+	return ((type < uncore_num_freerunning_types(box, event)) &&
+	       (idx < uncore_num_freerunning(box, event)));
+}
+
 static inline int uncore_num_counters(struct intel_uncore_box *box)
 {
 	return box->pmu->type->num_counters;
 }
 
+static inline bool is_freerunning_event(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	return (((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) &&
+		(((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_MSR_START));
+}
+
 static inline void uncore_disable_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->type->ops->disable_box)
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ