lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150721095524.GK19282@twins.programming.kicks-ass.net>
Date:	Tue, 21 Jul 2015 11:55:24 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Kan Liang <kan.liang@...el.com>
Cc:	mingo@...hat.com, acme@...nel.org, eranian@...gle.com,
	ak@...ux.intel.com, mark.rutland@....com, adrian.hunter@...el.com,
	dsahern@...il.com, jolsa@...nel.org, namhyung@...nel.org,
	linux-kernel@...r.kernel.org, Andy Lutomirski <luto@...nel.org>
Subject: Re: [PATCH RFC V2 1/1] x86, perf: Add a freq pmu driver

On Mon, Jul 20, 2015 at 11:49:06AM -0400, Kan Liang wrote:
> +static void freq_event_update(struct perf_event *event)
> +{
> +	u64 prev;
> +	u64 now;
> +
> +	/* Assume counters are 64bit */
> +	now = freq_read_counter(event);
> +	prev = local64_xchg(&event->hw.prev_count, now);
> +	local64_add(now - prev, &event->count);
> +}

That really wants to be that cmpxchg loop; esp. since you want to be
able to combine these events with hardware sampling events.

In which case the update from NMI context can interfere with an update
from whatever other context.

> +static void freq_event_stop(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	/* mark event as deactivated and stopped */
> +	if (!(hwc->state & PERF_HES_STOPPED))
> +		hwc->state |= PERF_HES_STOPPED;

This is pointless, nobody will clear the bit.

> +	/* check if update of sw counter is necessary */
> +	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
> +		freq_event_update(event);
> +		hwc->state |= PERF_HES_UPTODATE;
> +	}

This should be an unconditional update.

> +}
> +
> +static void freq_event_del(struct perf_event *event, int flags)
> +{
> +	freq_event_stop(event, PERF_EF_UPDATE);
> +}
> +
> +static int freq_event_add(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;

And that again is pointless.

> +	if (flags & PERF_EF_START)
> +		freq_event_start(event, flags);
> +
> +	return 0;
> +}

Other than that, I did a big rename to MSR, and added SMI_COUNT.

This seems to work fine for per-task events:

$ perf stat -e '{ref-cycles,msr/tsc/}' -e '{msr/aperf/,msr/mperf/}' -e 'msr/smi/' -- ls > /dev/null

 Performance counter stats for 'ls':

         8,657,404      ref-cycles
         8,943,754      msr/tsc/
         3,784,505      msr/aperf/
         8,651,964      msr/mperf/
                 0      msr/smi/

       0.004307333 seconds time elapsed

---
Subject: x86, perf: Add an MSR pmu driver
From: Andy Lutomirski <luto@...nel.org>
Date: Mon, 20 Jul 2015 11:49:06 -0400

This patch adds an MSR PMU to support free running MSR counters. Such as
time and freq related counters includes TSC, IA32_APERF, IA32_MPERF and
IA32_PPERF, but also SMI_COUNT.

The events are exposed in sysfs for use by perf stat and other tools.
The files are under /sys/devices/msr/events/

Cc: adrian.hunter@...el.com
Cc: dsahern@...il.com
Cc: jolsa@...nel.org
Cc: namhyung@...nel.org
Cc: mingo@...hat.com
Cc: acme@...nel.org
Cc: eranian@...gle.com
Cc: ak@...ux.intel.com
Cc: mark.rutland@....com
Signed-off-by: Andy Lutomirski <luto@...nel.org>
Signed-off-by: Kan Liang <kan.liang@...el.com>
[peterz: s/freq/msr/, added SMI_COUNT, fixed bugs]
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Link: http://lkml.kernel.org/r/1437407346-31186-1-git-send-email-kan.liang@intel.com
---
 arch/x86/kernel/cpu/Makefile         |    2 
 arch/x86/kernel/cpu/perf_event_msr.c |  242 +++++++++++++++++++++++++++++++++++
 2 files changed, 244 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/perf_event_msr.c

--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -46,6 +46,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+
 					   perf_event_intel_uncore_snb.o \
 					   perf_event_intel_uncore_snbep.o \
 					   perf_event_intel_uncore_nhmex.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_msr.o
+obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_msr.o
 endif
 
 
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -0,0 +1,242 @@
+#include <linux/perf_event.h>
+
+enum perf_msr_id {
+	PERF_MSR_TSC			= 0,
+	PERF_MSR_APERF			= 1,
+	PERF_MSR_MPERF			= 2,
+	PERF_MSR_PPERF			= 3,
+	PERF_MSR_SMI			= 4,
+
+	PERF_MSR_EVENT_MAX,
+};
+
+struct perf_msr {
+	int	id;
+	u64	msr;
+};
+
+static struct perf_msr msr[] = {
+	{ PERF_MSR_TSC, 0 },
+	{ PERF_MSR_APERF, MSR_IA32_APERF },
+	{ PERF_MSR_MPERF, MSR_IA32_MPERF },
+	{ PERF_MSR_PPERF, MSR_PPERF },
+	{ PERF_MSR_SMI, MSR_SMI_COUNT },
+};
+
+PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
+
+static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+	&evattr_tsc.attr.attr,
+};
+
+static struct attribute_group events_attr_group = {
+	.name = "events",
+	.attrs = events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+static struct attribute_group format_attr_group = {
+	.name = "format",
+	.attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&events_attr_group,
+	&format_attr_group,
+	NULL,
+};
+
+static int msr_event_init(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (cfg >= PERF_MSR_EVENT_MAX)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	event->hw.idx = -1;
+	event->hw.event_base = msr[cfg].msr;
+	event->hw.config = cfg;
+
+	return 0;
+}
+
+static inline u64 msr_read_counter(struct perf_event *event)
+{
+	u64 now;
+
+	if (event->hw.event_base)
+		rdmsrl(event->hw.event_base, now);
+	else
+		now = rdtsc();
+
+	return now;
+}
+static void msr_event_update(struct perf_event *event)
+{
+	u64 prev, now;
+	s64 delta;
+
+	/* Careful, an NMI might modify the previous event value. */
+again:
+	prev = local64_read(&event->hw.prev_count);
+	now = msr_read_counter(event);
+
+	if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
+		goto again;
+
+	delta = now - prev;
+	if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
+		delta <<= 32;
+		delta >>= 32; /* sign extend */
+	}
+	local64_add(now - prev, &event->count);
+}
+
+static void msr_event_start(struct perf_event *event, int flags)
+{
+	u64 now;
+
+	now = msr_read_counter(event);
+	local64_set(&event->hw.prev_count, now);
+}
+
+static void msr_event_stop(struct perf_event *event, int flags)
+{
+	msr_event_update(event);
+}
+
+static void msr_event_del(struct perf_event *event, int flags)
+{
+	msr_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int msr_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		msr_event_start(event, flags);
+
+	return 0;
+}
+
+static struct pmu pmu_msr = {
+	.task_ctx_nr	= perf_sw_context,
+	.attr_groups	= attr_groups,
+	.event_init	= msr_event_init,
+	.add		= msr_event_add,
+	.del		= msr_event_del,
+	.start		= msr_event_start,
+	.stop		= msr_event_stop,
+	.read		= msr_event_update,
+	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init intel_msr_init(int idx)
+{
+	if (boot_cpu_data.x86 != 6)
+		return 0;
+
+	switch (boot_cpu_data.x86_model) {
+	case 30: /* 45nm Nehalem    */
+	case 26: /* 45nm Nehalem-EP */
+	case 46: /* 45nm Nehalem-EX */
+
+	case 37: /* 32nm Westmere    */
+	case 44: /* 32nm Westmere-EP */
+	case 47: /* 32nm Westmere-EX */
+
+	case 42: /* 32nm SandyBridge         */
+	case 45: /* 32nm SandyBridge-E/EN/EP */
+
+	case 58: /* 22nm IvyBridge       */
+	case 62: /* 22nm IvyBridge-EP/EX */
+
+	case 60: /* 22nm Haswell Core */
+	case 63: /* 22nm Haswell Server */
+	case 69: /* 22nm Haswell ULT */
+	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+	case 61: /* 14nm Broadwell Core-M */
+	case 86: /* 14nm Broadwell Xeon D */
+	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+	case 79: /* 14nm Broadwell Server */
+		events_attrs[idx++] = &evattr_smi.attr.attr;
+		break;
+
+	case 78: /* 14nm Skylake Mobile */
+	case 94: /* 14nm Skylake Desktop */
+		events_attrs[idx++] = &evattr_pperf.attr.attr;
+		events_attrs[idx++] = &evattr_smi.attr.attr;
+		break;
+
+	case 55: /* 22nm Atom "Silvermont"                */
+	case 76: /* 14nm Atom "Airmont"                   */
+	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+		events_attrs[idx++] = &evattr_smi.attr.attr;
+		break;
+	}
+
+	events_attrs[idx] = NULL;
+
+	return 0;
+}
+
+static int __init amd_msr_init(int idx)
+{
+	return 0;
+}
+
+static int __init msr_init(void)
+{
+	int err;
+	int idx = 1;
+
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
+		events_attrs[idx++] = &evattr_aperf.attr.attr;
+		events_attrs[idx++] = &evattr_mperf.attr.attr;
+		events_attrs[idx] = NULL;
+	}
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		err = intel_msr_init(idx);
+		break;
+
+	case X86_VENDOR_AMD:
+		err = amd_msr_init(idx);
+		break;
+
+	default:
+		err = -ENOTSUPP;
+	}
+
+	if (err != 0) {
+		pr_cont("no msr PMU driver.\n");
+		return 0;
+	}
+
+	perf_pmu_register(&pmu_msr, "msr", -1);
+
+	return 0;
+}
+device_initcall(msr_init);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ