lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 6 Aug 2015 16:46:32 -0300
From:	Arnaldo Carvalho de Melo <acme@...nel.org>
To:	Peter Zijlstra <peterz@...radead.org>,
	Max Filippov <jcmvbkbc@...il.com>
Cc:	linux-xtensa@...ux-xtensa.org, linux-kernel@...r.kernel.org,
	Chris Zankel <chris@...kel.net>,
	Marc Gauthier <marc@...ence.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Paul Mackerras <paulus@...ba.org>,
	Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH v2 07/13] xtensa: implement counting and sampling perf
 events

Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> counters. Each counter may be enabled independently and can count any
> single type of hardware performance events. Event counting may be enabled
> and disabled globally (per PMM).
> Each counter has status register with bits indicating if the counter has
> been overflown and may be programmed to raise profiling IRQ on overflow.
> This IRQ is used to rewind counters and allow for counting more than 2^32
> samples for counting events and to report samples for sampling events.
> 
> For more details see Tensilica Debug User's Guide, chapter 8
> "Performance monitor module".

Has this gone via PeterZ? I added the tools/ bits in my perf/core
branch, will go in next pull req,

- Arnaldo
 
> Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> Cc: Paul Mackerras <paulus@...ba.org>
> Cc: Ingo Molnar <mingo@...hat.com>
> Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
> Signed-off-by: Max Filippov <jcmvbkbc@...il.com>
> ---
> Changes v1->v2:
> - use -EINVAL instead of -ENOENT for invalid PMU event configuratons.
> 
>  arch/xtensa/Kconfig             |  10 +
>  arch/xtensa/kernel/Makefile     |   1 +
>  arch/xtensa/kernel/perf_event.c | 450 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 461 insertions(+)
>  create mode 100644 arch/xtensa/kernel/perf_event.c
> 
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index 3c57934..0e92885 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -126,6 +126,16 @@ config XTENSA_VARIANT_MMU
>  	  Build a Conventional Kernel with full MMU support,
>  	  ie: it supports a TLB with auto-loading, page protection.
>  
> +config XTENSA_VARIANT_HAVE_PERF_EVENTS
> +	bool "Core variant has Performance Monitor Module"
> +	depends on XTENSA_VARIANT_CUSTOM
> +	default n
> +	help
> +	  Enable if core variant has Performance Monitor Module with
> +	  External Registers Interface.
> +
> +	  If unsure, say N.
> +
>  config XTENSA_UNALIGNED_USER
>  	bool "Unaligned memory access in use space"
>  	help
> diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
> index d3a0f0f..547a757 100644
> --- a/arch/xtensa/kernel/Makefile
> +++ b/arch/xtensa/kernel/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
>  obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
>  obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
>  obj-$(CONFIG_SMP) += smp.o mxhead.o
> +obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
>  
>  AFLAGS_head.o += -mtext-section-literals
>  
> diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
> new file mode 100644
> index 0000000..b44df3c
> --- /dev/null
> +++ b/arch/xtensa/kernel/perf_event.c
> @@ -0,0 +1,450 @@
> +/*
> + * Xtensa Performance Monitor Module driver
> + * See Tensilica Debug User's Guide for PMU registers documentation.
> + *
> + * Copyright (C) 2015 Cadence Design Systems Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/irqdomain.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/perf_event.h>
> +#include <linux/platform_device.h>
> +
> +#include <asm/processor.h>
> +#include <asm/stacktrace.h>
> +
> +/* Global control/status for all perf counters */
> +#define XTENSA_PMU_PMG			0x1000
> +/* Perf counter values */
> +#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
> +/* Perf counter control registers */
> +#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
> +/* Perf counter status registers */
> +#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
> +
> +#define XTENSA_PMU_PMG_PMEN		0x1
> +
> +#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
> +#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
> +
> +#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
> +#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
> +#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
> +#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
> +#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
> +#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
> +#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
> +
> +#define XTENSA_PMU_MASK(select, mask) \
> +	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
> +	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
> +	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
> +	 XTENSA_PMU_PMCTRL_INTEN)
> +
> +#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
> +#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
> +
> +struct xtensa_pmu_events {
> +	/* Array of events currently on this core */
> +	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
> +	/* Bitmap of used hardware counters */
> +	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
> +};
> +static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
> +
> +static const u32 xtensa_hw_ctl[] = {
> +	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
> +	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
> +	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
> +	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
> +	/* Taken and non-taken branches + taken loop ends */
> +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
> +	/* Instruction-related + other global stall cycles */
> +	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
> +	/* Data-related global stall cycles */
> +	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
> +};
> +
> +#define C(_x) PERF_COUNT_HW_CACHE_##_x
> +
> +static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
> +	[C(L1D)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
> +		},
> +	},
> +	[C(L1I)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
> +		},
> +	},
> +	[C(DTLB)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
> +		},
> +	},
> +	[C(ITLB)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
> +			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
> +		},
> +	},
> +};
> +
> +static int xtensa_pmu_cache_event(u64 config)
> +{
> +	unsigned int cache_type, cache_op, cache_result;
> +	int ret;
> +
> +	cache_type = (config >>  0) & 0xff;
> +	cache_op = (config >>  8) & 0xff;
> +	cache_result = (config >> 16) & 0xff;
> +
> +	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
> +	    cache_op >= C(OP_MAX) ||
> +	    cache_result >= C(RESULT_MAX))
> +		return -EINVAL;
> +
> +	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
> +
> +	if (ret == 0)
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +static inline uint32_t xtensa_pmu_read_counter(int idx)
> +{
> +	return get_er(XTENSA_PMU_PM(idx));
> +}
> +
> +static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
> +{
> +	set_er(v, XTENSA_PMU_PM(idx));
> +}
> +
> +static void xtensa_perf_event_update(struct perf_event *event,
> +				     struct hw_perf_event *hwc, int idx)
> +{
> +	uint64_t prev_raw_count, new_raw_count;
> +	int64_t delta;
> +
> +	do {
> +		prev_raw_count = local64_read(&hwc->prev_count);
> +		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
> +	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
> +				 new_raw_count) != prev_raw_count);
> +
> +	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
> +
> +	local64_add(delta, &event->count);
> +	local64_sub(delta, &hwc->period_left);
> +}
> +
> +static bool xtensa_perf_event_set_period(struct perf_event *event,
> +					 struct hw_perf_event *hwc, int idx)
> +{
> +	bool rc = false;
> +	s64 left;
> +
> +	if (!is_sampling_event(event)) {
> +		left = XTENSA_PMU_COUNTER_MAX;
> +	} else {
> +		s64 period = hwc->sample_period;
> +
> +		left = local64_read(&hwc->period_left);
> +		if (left <= -period) {
> +			left = period;
> +			local64_set(&hwc->period_left, left);
> +			hwc->last_period = period;
> +			rc = true;
> +		} else if (left <= 0) {
> +			left += period;
> +			local64_set(&hwc->period_left, left);
> +			hwc->last_period = period;
> +			rc = true;
> +		}
> +		if (left > XTENSA_PMU_COUNTER_MAX)
> +			left = XTENSA_PMU_COUNTER_MAX;
> +	}
> +
> +	local64_set(&hwc->prev_count, -left);
> +	xtensa_pmu_write_counter(idx, -left);
> +	perf_event_update_userpage(event);
> +
> +	return rc;
> +}
> +
> +static void xtensa_pmu_enable(struct pmu *pmu)
> +{
> +	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
> +}
> +
> +static void xtensa_pmu_disable(struct pmu *pmu)
> +{
> +	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
> +}
> +
> +static int xtensa_pmu_event_init(struct perf_event *event)
> +{
> +	int ret;
> +
> +	switch (event->attr.type) {
> +	case PERF_TYPE_HARDWARE:
> +		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
> +		    xtensa_hw_ctl[event->attr.config] == 0)
> +			return -EINVAL;
> +		event->hw.config = xtensa_hw_ctl[event->attr.config];
> +		return 0;
> +
> +	case PERF_TYPE_HW_CACHE:
> +		ret = xtensa_pmu_cache_event(event->attr.config);
> +		if (ret < 0)
> +			return ret;
> +		event->hw.config = ret;
> +		return 0;
> +
> +	case PERF_TYPE_RAW:
> +		/* Not 'previous counter' select */
> +		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
> +		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
> +			return -EINVAL;
> +		event->hw.config = (event->attr.config &
> +				    (XTENSA_PMU_PMCTRL_KRNLCNT |
> +				     XTENSA_PMU_PMCTRL_TRACELEVEL |
> +				     XTENSA_PMU_PMCTRL_SELECT |
> +				     XTENSA_PMU_PMCTRL_MASK)) |
> +			XTENSA_PMU_PMCTRL_INTEN;
> +		return 0;
> +
> +	default:
> +		return -ENOENT;
> +	}
> +}
> +
> +/*
> + * Starts/Stops a counter present on the PMU. The PMI handler
> + * should stop the counter when perf_event_overflow() returns
> + * !0. ->start() will be used to continue.
> + */
> +static void xtensa_pmu_start(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	int idx = hwc->idx;
> +
> +	if (WARN_ON_ONCE(idx == -1))
> +		return;
> +
> +	if (flags & PERF_EF_RELOAD) {
> +		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> +		xtensa_perf_event_set_period(event, hwc, idx);
> +	}
> +
> +	hwc->state = 0;
> +
> +	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
> +}
> +
> +static void xtensa_pmu_stop(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	int idx = hwc->idx;
> +
> +	if (!(hwc->state & PERF_HES_STOPPED)) {
> +		set_er(0, XTENSA_PMU_PMCTRL(idx));
> +		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
> +		       XTENSA_PMU_PMSTAT(idx));
> +		hwc->state |= PERF_HES_STOPPED;
> +	}
> +
> +	if ((flags & PERF_EF_UPDATE) &&
> +	    !(event->hw.state & PERF_HES_UPTODATE)) {
> +		xtensa_perf_event_update(event, &event->hw, idx);
> +		event->hw.state |= PERF_HES_UPTODATE;
> +	}
> +}
> +
> +/*
> + * Adds/Removes a counter to/from the PMU, can be done inside
> + * a transaction, see the ->*_txn() methods.
> + */
> +static int xtensa_pmu_add(struct perf_event *event, int flags)
> +{
> +	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +	struct hw_perf_event *hwc = &event->hw;
> +	int idx = hwc->idx;
> +
> +	if (__test_and_set_bit(idx, ev->used_mask)) {
> +		idx = find_first_zero_bit(ev->used_mask,
> +					  XCHAL_NUM_PERF_COUNTERS);
> +		if (idx == XCHAL_NUM_PERF_COUNTERS)
> +			return -EAGAIN;
> +
> +		__set_bit(idx, ev->used_mask);
> +		hwc->idx = idx;
> +	}
> +	ev->event[idx] = event;
> +
> +	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +
> +	if (flags & PERF_EF_START)
> +		xtensa_pmu_start(event, PERF_EF_RELOAD);
> +
> +	perf_event_update_userpage(event);
> +	return 0;
> +}
> +
> +static void xtensa_pmu_del(struct perf_event *event, int flags)
> +{
> +	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +
> +	xtensa_pmu_stop(event, PERF_EF_UPDATE);
> +	__clear_bit(event->hw.idx, ev->used_mask);
> +	perf_event_update_userpage(event);
> +}
> +
> +static void xtensa_pmu_read(struct perf_event *event)
> +{
> +	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
> +}
> +
> +static int callchain_trace(struct stackframe *frame, void *data)
> +{
> +	struct perf_callchain_entry *entry = data;
> +
> +	perf_callchain_store(entry, frame->pc);
> +	return 0;
> +}
> +
> +void perf_callchain_kernel(struct perf_callchain_entry *entry,
> +			   struct pt_regs *regs)
> +{
> +	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
> +				callchain_trace, NULL, entry);
> +}
> +
> +void perf_callchain_user(struct perf_callchain_entry *entry,
> +			 struct pt_regs *regs)
> +{
> +	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
> +			      callchain_trace, entry);
> +}
> +
> +void perf_event_print_debug(void)
> +{
> +	unsigned long flags;
> +	unsigned i;
> +
> +	local_irq_save(flags);
> +	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
> +		get_er(XTENSA_PMU_PMG));
> +	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
> +		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
> +			i, get_er(XTENSA_PMU_PM(i)),
> +			i, get_er(XTENSA_PMU_PMCTRL(i)),
> +			i, get_er(XTENSA_PMU_PMSTAT(i)));
> +	local_irq_restore(flags);
> +}
> +
> +static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
> +{
> +	irqreturn_t rc = IRQ_NONE;
> +	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +	unsigned i;
> +
> +	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
> +	     i < XCHAL_NUM_PERF_COUNTERS;
> +	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
> +		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
> +		struct perf_event *event = ev->event[i];
> +		struct hw_perf_event *hwc = &event->hw;
> +		u64 last_period;
> +
> +		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
> +			continue;
> +
> +		set_er(v, XTENSA_PMU_PMSTAT(i));
> +		xtensa_perf_event_update(event, hwc, i);
> +		last_period = hwc->last_period;
> +		if (xtensa_perf_event_set_period(event, hwc, i)) {
> +			struct perf_sample_data data;
> +			struct pt_regs *regs = get_irq_regs();
> +
> +			perf_sample_data_init(&data, 0, last_period);
> +			if (perf_event_overflow(event, &data, regs))
> +				xtensa_pmu_stop(event, 0);
> +		}
> +
> +		rc = IRQ_HANDLED;
> +	}
> +	return rc;
> +}
> +
> +static struct pmu xtensa_pmu = {
> +	.pmu_enable = xtensa_pmu_enable,
> +	.pmu_disable = xtensa_pmu_disable,
> +	.event_init = xtensa_pmu_event_init,
> +	.add = xtensa_pmu_add,
> +	.del = xtensa_pmu_del,
> +	.start = xtensa_pmu_start,
> +	.stop = xtensa_pmu_stop,
> +	.read = xtensa_pmu_read,
> +};
> +
> +static void xtensa_pmu_setup(void)
> +{
> +	unsigned i;
> +
> +	set_er(0, XTENSA_PMU_PMG);
> +	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
> +		set_er(0, XTENSA_PMU_PMCTRL(i));
> +		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
> +	}
> +}
> +
> +static int xtensa_pmu_notifier(struct notifier_block *self,
> +			       unsigned long action, void *data)
> +{
> +	switch (action & ~CPU_TASKS_FROZEN) {
> +	case CPU_STARTING:
> +		xtensa_pmu_setup();
> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static int __init xtensa_pmu_init(void)
> +{
> +	int ret;
> +	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
> +
> +	perf_cpu_notifier(xtensa_pmu_notifier);
> +	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
> +			  "pmu", NULL);
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
> +	if (ret)
> +		free_irq(irq, NULL);
> +
> +	return ret;
> +}
> +early_initcall(xtensa_pmu_init);
> -- 
> 1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ