lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1365521034-4496-4-git-send-email-jacob.shin@amd.com>
Date:	Tue, 9 Apr 2013 10:23:54 -0500
From:	Jacob Shin <jacob.shin@....com>
To:	Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Arnaldo Carvalho de Melo <acme@...stprotocols.net>
CC:	"H. Peter Anvin" <hpa@...or.com>,
	Thomas Gleixner <tglx@...utronix.de>, <x86@...nel.org>,
	Stephane Eranian <eranian@...gle.com>,
	Jiri Olsa <jolsa@...hat.com>, <linux-kernel@...r.kernel.org>,
	Jacob Shin <jacob.shin@....com>
Subject: [PATCH RESEND 3/3] perf, amd: Enable L2I performance counters on AMD Family 16h

AMD Family 16h processors provide 4 new performance counters (in
addition to 4 legacy core counters, and 4 northbridge counters) for
monitoring L2 cache specific events (i.e. L2 cache misses). These 4
counters are shared between all CPUs that share the same L2 cache. We
will use the same existing event constraints handling logic to enforce
this sharing.

Signed-off-by: Jacob Shin <jacob.shin@....com>
---
 arch/x86/include/asm/cpufeature.h     |    2 +
 arch/x86/include/asm/perf_event.h     |    4 +
 arch/x86/include/uapi/asm/msr-index.h |    4 +
 arch/x86/kernel/cpu/perf_event.h      |    2 +
 arch/x86/kernel/cpu/perf_event_amd.c  |  167 +++++++++++++++++++++++++++++----
 5 files changed, 162 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..0f534af 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
 #define X86_FEATURE_TOPOEXT	(6*32+22) /* topology extensions CPUID leafs */
 #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
 #define X86_FEATURE_PERFCTR_NB  (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2I (6*32+28) /* L2I performance counter extensions */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
 #define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
 #define cpu_has_perfctr_nb	boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2i	boot_cpu_has(X86_FEATURE_PERFCTR_L2I)
 #define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
 #define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
 #define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 57cb634..ed430ea 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -36,6 +36,9 @@
 #define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT		37
 #define AMD64_EVENTSEL_INT_CORE_SEL_MASK		\
 	(0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
+#define AMD64_EVENTSEL_THREAD_MASK_SHIFT		56
+#define AMD64_EVENTSEL_THREAD_MASK_MASK			\
+	(0xFULL << AMD64_EVENTSEL_THREAD_MASK_SHIFT)
 
 #define AMD64_EVENTSEL_EVENT	\
 	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -57,6 +60,7 @@
 #define AMD64_NUM_COUNTERS				4
 #define AMD64_NUM_COUNTERS_CORE				6
 #define AMD64_NUM_COUNTERS_NB				4
+#define AMD64_NUM_COUNTERS_L2I				4
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bf7bb68..b575788 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -196,6 +196,10 @@
 #define MSR_AMD64_IBSBRTARGET		0xc001103b
 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
 
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL		0xc0010230
+#define MSR_F16H_L2I_PERF_CTR		0xc0010231
+
 /* Fam 15h MSRs */
 #define MSR_F15H_PERF_CTL		0xc0010200
 #define MSR_F15H_PERF_CTR		0xc0010201
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 9751201..9297110 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -172,6 +172,8 @@ struct cpu_hw_events {
 	 * AMD specific bits
 	 */
 	struct amd_shared_regs		*amd_nb;
+	struct amd_shared_regs		*amd_l2i;
+
 	/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
 	u64				perf_ctr_virt_mask;
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 36b5162..e0fab88 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,7 +132,12 @@ static u64 amd_pmu_event_map(int hw_event)
 	return amd_perfmon_event_map[hw_event];
 }
 
+#define CONFIG1_CORE_EVENT 0
+#define CONFIG1_NB_EVENT   1
+#define CONFIG1_L2I_EVENT  2
+
 static struct event_constraint *amd_nb_event_constraint;
+static struct event_constraint *amd_l2i_event_constraint;
 
 /*
  * Previously calculated offsets
@@ -151,6 +156,9 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
  * CPUs with north bridge performance counter extensions:
  *   4 additional counters starting at 0xc0010240 each offset by 2
  *   (indexed right above either one of the above core counters)
+ *
+ * CPUs with L2I performance counter extensions:
+ *   4 additional counters starting at 0xc0010230 each offset by 2
  */
 static inline int amd_pmu_addr_offset(int index, bool eventsel)
 {
@@ -183,6 +191,18 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
 			base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
 
 		offset = base + ((index - first) << 1);
+	} else if (amd_l2i_event_constraint &&
+		   test_bit(index, amd_l2i_event_constraint->idxmsk)) {
+
+		first = find_first_bit(amd_l2i_event_constraint->idxmsk,
+				       X86_PMC_IDX_MAX);
+
+		if (eventsel)
+			base = MSR_F16H_L2I_PERF_CTL - x86_pmu.eventsel;
+		else
+			base = MSR_F16H_L2I_PERF_CTR - x86_pmu.perfctr;
+
+		offset = base + ((index - first) << 1);
 	} else if (!cpu_has_perfctr_core)
 		offset = index;
 	else
@@ -218,6 +238,13 @@ static inline int amd_pmu_rdpmc_index(int index)
 		first = find_first_bit(amd_nb_event_constraint->idxmsk,
 				       X86_PMC_IDX_MAX);
 		ret = index - first + 6;
+	} else if (amd_l2i_event_constraint &&
+	    test_bit(index, amd_l2i_event_constraint->idxmsk)) {
+
+		first = find_first_bit(amd_l2i_event_constraint->idxmsk,
+				       X86_PMC_IDX_MAX);
+
+		ret = index - first + 10;
 	} else
 		ret = index;
 
@@ -245,14 +272,14 @@ static int amd_core_hw_config(struct perf_event *event)
 }
 
 /*
- * NB counters do not support the following event select bits:
+ * NB and L2I counters do not support the following event select bits:
  *   Host/Guest only
  *   Counter mask
  *   Invert counter mask
  *   Edge detect
  *   OS/User mode
  */
-static int amd_nb_hw_config(struct perf_event *event)
+static int amd_shared_hw_config(struct perf_event *event)
 {
 	/* for NB, we only allow system wide counting mode */
 	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
@@ -285,9 +312,22 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
 	return (hwc->config & 0xe0) == 0xe0;
 }
 
-static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+static inline int amd_is_perfctr_nb_event(struct perf_event *event)
 {
-	return amd_nb_event_constraint && amd_is_nb_event(hwc);
+	return amd_nb_event_constraint && amd_is_nb_event(&event->hw);
+}
+
+static inline int amd_is_perfctr_l2i_event(struct perf_event *event)
+{
+	unsigned int event_code = amd_get_event_code(&event->hw);
+
+	if (!amd_l2i_event_constraint)
+		return 0;
+
+	if (event_code >= 0x07d && event_code <= 0x07f)
+		return 1;
+
+	return event->attr.config1 == CONFIG1_L2I_EVENT;
 }
 
 static inline int amd_has_nb(struct cpu_hw_events *cpuc)
@@ -297,6 +337,13 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 	return nb && nb->id != -1;
 }
 
+static inline int amd_has_l2i(struct cpu_hw_events *cpuc)
+{
+	struct amd_shared_regs *l2i = cpuc->amd_l2i;
+
+	return l2i && l2i->id != -1;
+}
+
 static int amd_pmu_hw_config(struct perf_event *event)
 {
 	int ret;
@@ -315,8 +362,8 @@ static int amd_pmu_hw_config(struct perf_event *event)
 	if (event->attr.type == PERF_TYPE_RAW)
 		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
 
-	if (amd_is_perfctr_nb_event(&event->hw))
-		return amd_nb_hw_config(event);
+	if (amd_is_perfctr_nb_event(event) || amd_is_perfctr_l2i_event(event))
+		return amd_shared_hw_config(event);
 
 	return amd_core_hw_config(event);
 }
@@ -340,8 +387,9 @@ static void amd_put_shared_event_constraints(struct amd_shared_regs *regs,
 	}
 }
 
-static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
+static void amd_shared_interrupt_hw_config(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	int core_id = cpu_data(smp_processor_id()).cpu_core_id;
 
 	/* deliver interrupts only to this core */
@@ -351,6 +399,13 @@ static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
 		hwc->config |= (u64)(core_id) <<
 			AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
 	}
+
+	/* mask out events from other cores */
+	if (amd_is_perfctr_l2i_event(event)) {
+		hwc->config |= AMD64_EVENTSEL_THREAD_MASK_MASK;
+		hwc->config &= ~(1ULL <<
+			(AMD64_EVENTSEL_THREAD_MASK_SHIFT + core_id));
+	}
 }
 
  /*
@@ -441,8 +496,8 @@ amd_get_shared_event_constraints(struct cpu_hw_events *cpuc,
 	if (new == -1)
 		return &emptyconstraint;
 
-	if (amd_is_perfctr_nb_event(hwc))
-		amd_nb_interrupt_hw_config(hwc);
+	if (amd_is_perfctr_nb_event(event) || amd_is_perfctr_l2i_event(event))
+		amd_shared_interrupt_hw_config(event);
 
 	return &regs->event_constraints[new];
 }
@@ -482,14 +537,18 @@ static int amd_pmu_cpu_prepare(int cpu)
 	if (!cpuc->amd_nb)
 		return NOTIFY_BAD;
 
+	cpuc->amd_l2i = amd_alloc_shared_regs(cpu);
+	if (!cpuc->amd_l2i)
+		return NOTIFY_BAD;
+
 	return NOTIFY_OK;
 }
 
 static void amd_pmu_cpu_starting(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-	struct amd_shared_regs *nb;
-	int i, nb_id;
+	struct amd_shared_regs *nb, *l2i;
+	int i, nb_id, l2_id;
 
 	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 
@@ -499,20 +558,44 @@ static void amd_pmu_cpu_starting(int cpu)
 	nb_id = amd_get_nb_id(cpu);
 	WARN_ON_ONCE(nb_id == BAD_APICID);
 
+	l2_id = cpu_data(cpu).compute_unit_id;
+
+	if (static_cpu_has(X86_FEATURE_TOPOEXT)) {
+		unsigned int eax, ebx, ecx, edx;
+		unsigned int nshared;
+		cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+		nshared = ((eax >> 14) & 0xfff) + 1;
+		l2_id = cpu_data(cpu).apicid - (cpu_data(cpu).apicid % nshared);
+	}
+
 	for_each_online_cpu(i) {
-		nb = per_cpu(cpu_hw_events, i).amd_nb;
-		if (WARN_ON_ONCE(!nb))
+		struct cpu_hw_events *other_cpuc = &per_cpu(cpu_hw_events, i);
+
+		nb = other_cpuc->amd_nb;
+		l2i = other_cpuc->amd_l2i;
+
+		if (WARN_ON_ONCE(!(nb && l2i)))
 			continue;
 
 		if (nb->id == nb_id) {
-			cpuc->kfree_on_online[0] = cpuc->amd_nb;
-			cpuc->amd_nb = nb;
-			break;
+			if (!cpuc->kfree_on_online[0]) {
+				cpuc->kfree_on_online[0] = cpuc->amd_nb;
+				cpuc->amd_nb = nb;
+			}
+
+			if (l2i->id == l2_id) {
+				cpuc->kfree_on_online[1] = cpuc->amd_l2i;
+				cpuc->amd_l2i = l2i;
+				break;
+			}
 		}
 	}
 
 	cpuc->amd_nb->id = nb_id;
 	cpuc->amd_nb->refcnt++;
+
+	cpuc->amd_l2i->id = l2_id;
+	cpuc->amd_l2i->refcnt++;
 }
 
 static void amd_pmu_cpu_dead(int cpu)
@@ -532,6 +615,15 @@ static void amd_pmu_cpu_dead(int cpu)
 
 		cpuhw->amd_nb = NULL;
 	}
+
+	if (cpuhw->amd_l2i) {
+		struct amd_shared_regs *l2i = cpuhw->amd_l2i;
+
+		if (l2i->id == -1 || --l2i->refcnt == 0)
+			kfree(l2i);
+
+		cpuhw->amd_l2i = NULL;
+	}
 }
 
 static struct event_constraint *
@@ -550,8 +642,12 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 				      struct perf_event *event)
 {
-	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (amd_has_nb(cpuc) && amd_is_nb_event(hwc))
 		amd_put_shared_event_constraints(cpuc->amd_nb, event);
+	else if (amd_has_l2i(cpuc) && amd_is_perfctr_l2i_event(event))
+		amd_put_shared_event_constraints(cpuc->amd_l2i, event);
 }
 
 PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
@@ -718,6 +814,25 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
 	}
 }
 
+static struct event_constraint amd_f16_PMC30 = EVENT_CONSTRAINT(0, 0x0F, 0);
+
+static struct event_constraint amd_L2IPMC = EVENT_CONSTRAINT(0, 0xF00, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f16h(struct cpu_hw_events *cpuc,
+			       struct perf_event    *event)
+{
+	if (amd_is_perfctr_l2i_event(event))
+		return amd_get_shared_event_constraints(cpuc, cpuc->amd_l2i,
+				event, amd_l2i_event_constraint);
+
+	if (amd_is_perfctr_nb_event(event))
+		return amd_get_shared_event_constraints(cpuc, cpuc->amd_nb,
+				event, amd_nb_event_constraint);
+
+	return &amd_f16_PMC30;
+}
+
 static ssize_t amd_event_sysfs_show(char *page, u64 config)
 {
 	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -762,6 +877,9 @@ static int setup_event_constraints(void)
 {
 	if (boot_cpu_data.x86 == 0x15)
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+	else if (boot_cpu_data.x86 == 0x16)
+		x86_pmu.get_event_constraints = amd_get_event_constraints_f16h;
+
 	return 0;
 }
 
@@ -807,6 +925,20 @@ static int setup_perfctr_nb(void)
 	return 0;
 }
 
+static int setup_perfctr_l2i(void)
+{
+	if (!cpu_has_perfctr_l2i)
+		return -ENODEV;
+
+	x86_pmu.num_counters += AMD64_NUM_COUNTERS_L2I;
+
+	amd_l2i_event_constraint = &amd_L2IPMC;
+
+	printk(KERN_INFO "perf: AMD L2I performance counters detected\n");
+
+	return 0;
+}
+
 __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -818,6 +950,7 @@ __init int amd_pmu_init(void)
 	setup_event_constraints();
 	setup_perfctr_core();
 	setup_perfctr_nb();
+	setup_perfctr_l2i();
 
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ