[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTik9adcY_pH52c_Y0iRjitDKZOu5zyPusfjycuiH@mail.gmail.com>
Date: Thu, 13 Jan 2011 18:31:03 +0100
From: Stephane Eranian <eranian@...gle.com>
To: Lin Ming <ming.m.lin@...el.com>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Ingo Molnar <mingo@...e.hu>, Andi Kleen <andi@...stfloor.org>,
lkml <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 2/7] perf-events: Add support for supplementary event
registers v4
Hi,
I'd like to suggest that the OFFCORE_RESPONSE extra MSR encoding
be put into a dedicated field in the perf_event_attr instead of in the upper
32-bits of attr->config.
There may not be enough space to encode for future processors.
In fact, given that the Sandy Bridge PMU spec is now available, we
have a first example of this (see Vol3b figure 30.29). OFFCORE_RESPONSE
needs 38 bits. So, instead of having NHM/WSM use attr->config and SNB
use another field, I think it would make sense to have that in a new u64 field
for all processors. Despite the fact that OFFCORE_RESPONSE remains
a model-specific feature, I think it would help user tools and libraries if we
were to use a dedicated field.
On Mon, Dec 27, 2010 at 4:36 PM, Lin Ming <ming.m.lin@...el.com> wrote:
> From: Andi Kleen <ak@...ux.intel.com>
>
> Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
> that can be used to monitor any offcore accesses from a core.
> This is a very useful event for various tunings, and it's
> also needed to implement the generic LLC-* events correctly.
>
> Unfortunately this event requires programming a mask in a separate
> register. And worse this separate register is per core, not per
> CPU thread.
>
> This patch adds:
> - Teaches perf_events that OFFCORE_RESPONSE needs extra parameters.
> The extra parameters are passed by user space in the unused upper
> 32bits of the config word.
> - Add support to the Intel perf_event core to schedule per
> core resources. This adds fairly generic infrastructure that
> can be also used for other per core resources.
> The basic code has is patterned after the similar AMD northbridge
> constraints code.
>
> Thanks to Stephane Eranian who pointed out some problems
> in the original version and suggested improvements.
>
> Cc: eranian@...gle.com
> Signed-off-by: Andi Kleen <ak@...ux.intel.com>
> ---
> arch/x86/kernel/cpu/perf_event.c | 70 +++++++++++
> arch/x86/kernel/cpu/perf_event_intel.c | 198 ++++++++++++++++++++++++++++++++
> include/linux/perf_event.h | 2 +
> 3 files changed, 270 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 0a360d1..df971ea 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -93,6 +93,8 @@ struct amd_nb {
> struct event_constraint event_constraints[X86_PMC_IDX_MAX];
> };
>
> +struct intel_percore;
> +
> #define MAX_LBR_ENTRIES 16
>
> struct cpu_hw_events {
> @@ -128,6 +130,13 @@ struct cpu_hw_events {
> struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
>
> /*
> + * Intel percore register state.
> + * Coordinate shared resources between HT threads.
> + */
> + int percore_used; /* Used by this CPU? */
> + struct intel_percore *per_core;
> +
> + /*
> * AMD specific bits
> */
> struct amd_nb *amd_nb;
> @@ -175,6 +184,32 @@ struct cpu_hw_events {
> #define for_each_event_constraint(e, c) \
> for ((e) = (c); (e)->weight; (e)++)
>
> +/*
> + * Extra registers for specific events.
> + * Some events need large masks and require external MSRs.
> + * Define a mapping to these extra registers.
> + * The actual contents are still encoded in unused parts of the
> + * original config u64.
> + */
> +struct extra_reg {
> + unsigned int event;
> + unsigned int msr;
> + unsigned int extra_shift;
> + u64 config_mask;
> + u64 valid_mask;
> +};
> +
> +#define EVENT_EXTRA_REG(e, ms, m, vm, es) { \
> + .event = (e), \
> + .msr = (ms), \
> + .config_mask = (m), \
> + .valid_mask = (vm), \
> + .extra_shift = (es), \
> + }
> +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, es) \
> + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, es)
> +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, 0)
> +
> union perf_capabilities {
> struct {
> u64 lbr_format : 6;
> @@ -219,6 +254,7 @@ struct x86_pmu {
> void (*put_event_constraints)(struct cpu_hw_events *cpuc,
> struct perf_event *event);
> struct event_constraint *event_constraints;
> + struct event_constraint *percore_constraints;
> void (*quirks)(void);
> int perfctr_second_write;
>
> @@ -247,6 +283,11 @@ struct x86_pmu {
> */
> unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
> int lbr_nr; /* hardware stack size */
> +
> + /*
> + * Extra registers for events
> + */
> + struct extra_reg *extra_regs;
> };
>
> static struct x86_pmu x86_pmu __read_mostly;
> @@ -321,6 +362,33 @@ again:
> return new_raw_count;
> }
>
> +/*
> + * Find and validate any extra registers to set up.
> + */
> +static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
> +{
> + struct extra_reg *er;
> + u64 extra;
> +
> + event->hw.extra_reg = 0;
> + event->hw.extra_config = 0;
> +
> + if (!x86_pmu.extra_regs)
> + return 0;
> +
> + for (er = x86_pmu.extra_regs; er->msr; er++) {
> + if (er->event != (config & er->config_mask))
> + continue;
> + event->hw.extra_reg = er->msr;
> + extra = config >> er->extra_shift;
> + if (extra & ~er->valid_mask)
> + return -EINVAL;
> + event->hw.extra_config = extra;
> + break;
> + }
> + return 0;
> +}
> +
> static atomic_t active_events;
> static DEFINE_MUTEX(pmc_reserve_mutex);
>
> @@ -918,6 +986,8 @@ static void x86_pmu_enable(struct pmu *pmu)
> static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
> u64 enable_mask)
> {
> + if (hwc->extra_reg)
> + wrmsrl(hwc->extra_reg, hwc->extra_config);
> wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
> }
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 24e390e..32569d4 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1,5 +1,27 @@
> #ifdef CONFIG_CPU_SUP_INTEL
>
> +#define MAX_EXTRA_REGS 2
> +
> +/*
> + * Per register state.
> + */
> +struct er_account {
> + int ref; /* reference count */
> + unsigned int extra_reg; /* extra MSR number */
> + u64 extra_config; /* extra MSR config */
> +};
> +
> +/*
> + * Per core state
> + * This used to coordinate shared registers for HT threads.
> + */
> +struct intel_percore {
> + raw_spinlock_t lock; /* protect structure */
> + struct er_account regs[MAX_EXTRA_REGS];
> + int refcnt; /* number of threads */
> + unsigned core_id;
> +};
> +
> /*
> * Intel PerfMon, used on Core and later.
> */
> @@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
> EVENT_CONSTRAINT_END
> };
>
> +static struct extra_reg intel_nehalem_extra_regs[] =
> +{
> + INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
> + EVENT_EXTRA_END
> +};
> +
> +static struct event_constraint intel_nehalem_percore_constraints[] =
> +{
> + INTEL_EVENT_CONSTRAINT(0xb7, 0),
> + EVENT_CONSTRAINT_END
> +};
> +
> static struct event_constraint intel_westmere_event_constraints[] =
> {
> FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -76,6 +110,20 @@ static struct event_constraint intel_westmere_event_constraints[] =
> EVENT_CONSTRAINT_END
> };
>
> +static struct extra_reg intel_westmere_extra_regs[] =
> +{
> + INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
> + INTEL_EVENT_EXTRA_REG(0xbb, 0x1a7, 0xffff, 32), /* OFFCORE_RESPONSE_1 */
> + EVENT_EXTRA_END
> +};
> +
> +static struct event_constraint intel_westmere_percore_constraints[] =
> +{
> + INTEL_EVENT_CONSTRAINT(0xb7, 0),
> + INTEL_EVENT_CONSTRAINT(0xbb, 0),
> + EVENT_CONSTRAINT_END
> +};
> +
> static struct event_constraint intel_gen_event_constraints[] =
> {
> FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -794,6 +842,66 @@ intel_bts_constraints(struct perf_event *event)
> }
>
> static struct event_constraint *
> +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> + struct event_constraint *c;
> + struct intel_percore *pc;
> + struct er_account *era;
> + int i;
> + int free_slot;
> + int found;
> +
> + if (!x86_pmu.percore_constraints)
> + return NULL;
> +
> + for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> + if (e != c->code)
> + continue;
> +
> + /*
> + * Allocate resource per core.
> + */
> + c = NULL;
> + pc = cpuc->per_core;
> + if (!pc)
> + break;
> + c = &emptyconstraint;
> + raw_spin_lock(&pc->lock);
> + free_slot = -1;
> + found = 0;
> + for (i = 0; i < MAX_EXTRA_REGS; i++) {
> + era = &pc->regs[i];
> + if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
> + /* Allow sharing same config */
> + if (hwc->extra_config == era->extra_config) {
> + era->ref++;
> + cpuc->percore_used = 1;
> + c = NULL;
> + }
> + /* else conflict */
> + found = 1;
> + break;
> + } else if (era->ref == 0 && free_slot == -1)
> + free_slot = i;
> + }
> + if (!found && free_slot != -1) {
> + era = &pc->regs[free_slot];
> + era->ref = 1;
> + era->extra_reg = hwc->extra_reg;
> + era->extra_config = hwc->extra_config;
> + cpuc->percore_used = 1;
> + c = NULL;
> + }
> + raw_spin_unlock(&pc->lock);
> + return c;
> + }
> +
> + return NULL;
> +}
> +
> +static struct event_constraint *
> intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> {
> struct event_constraint *c;
> @@ -806,9 +914,50 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
> if (c)
> return c;
>
> + c = intel_percore_constraints(cpuc, event);
> + if (c)
> + return c;
> +
> return x86_get_event_constraints(cpuc, event);
> }
>
> +static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> + struct perf_event *event)
> +{
> + struct extra_reg *er;
> + struct intel_percore *pc;
> + struct er_account *era;
> + struct hw_perf_event *hwc = &event->hw;
> + int i, allref;
> +
> + if (!cpuc->percore_used)
> + return;
> +
> + for (er = x86_pmu.extra_regs; er->msr; er++) {
> + if (er->event != (hwc->config & er->config_mask))
> + continue;
> +
> + pc = cpuc->per_core;
> + raw_spin_lock(&pc->lock);
> + for (i = 0; i < MAX_EXTRA_REGS; i++) {
> + era = &pc->regs[i];
> + if (era->ref > 0 &&
> + era->extra_config == hwc->extra_config &&
> + era->extra_reg == er->msr) {
> + era->ref--;
> + break;
> + }
> + }
> + allref = 0;
> + for (i = 0; i < MAX_EXTRA_REGS; i++)
> + allref += pc->regs[i].ref;
> + if (allref == 0)
> + cpuc->percore_used = 0;
> + raw_spin_unlock(&pc->lock);
> + break;
> + }
> +}
> +
> static int intel_pmu_hw_config(struct perf_event *event)
> {
> int ret = x86_pmu_hw_config(event);
> @@ -880,11 +1029,43 @@ static __initconst const struct x86_pmu core_pmu = {
> */
> .max_period = (1ULL << 31) - 1,
> .get_event_constraints = intel_get_event_constraints,
> + .put_event_constraints = intel_put_event_constraints,
> .event_constraints = intel_core_event_constraints,
> };
>
> +static int intel_pmu_cpu_prepare(int cpu)
> +{
> + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
> +
> + cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
> + GFP_KERNEL, cpu_to_node(cpu));
> + if (!cpuc->per_core)
> + return NOTIFY_BAD;
> +
> + raw_spin_lock_init(&cpuc->per_core->lock);
> + cpuc->per_core->core_id = -1;
> + return NOTIFY_OK;
> +}
> +
> static void intel_pmu_cpu_starting(int cpu)
> {
> + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
> + int core_id = topology_core_id(cpu);
> + int i;
> +
> + for_each_online_cpu(i) {
> + struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
> +
> + if (pc && pc->core_id == core_id) {
> + kfree(cpuc->per_core);
> + cpuc->per_core = pc;
> + break;
> + }
> + }
> +
> + cpuc->per_core->core_id = core_id;
> + cpuc->per_core->refcnt++;
> +
> init_debug_store_on_cpu(cpu);
> /*
> * Deal with CPUs that don't clear their LBRs on power-up.
> @@ -894,6 +1075,15 @@ static void intel_pmu_cpu_starting(int cpu)
>
> static void intel_pmu_cpu_dying(int cpu)
> {
> + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
> + struct intel_percore *pc = cpuc->per_core;
> +
> + if (pc) {
> + if (pc->core_id == -1 || --pc->refcnt == 0)
> + kfree(pc);
> + cpuc->per_core = NULL;
> + }
> +
> fini_debug_store_on_cpu(cpu);
> }
>
> @@ -918,7 +1108,9 @@ static __initconst const struct x86_pmu intel_pmu = {
> */
> .max_period = (1ULL << 31) - 1,
> .get_event_constraints = intel_get_event_constraints,
> + .put_event_constraints = intel_put_event_constraints,
>
> + .cpu_prepare = intel_pmu_cpu_prepare,
> .cpu_starting = intel_pmu_cpu_starting,
> .cpu_dying = intel_pmu_cpu_dying,
> };
> @@ -1036,7 +1228,10 @@ static __init int intel_pmu_init(void)
> intel_pmu_lbr_init_nhm();
>
> x86_pmu.event_constraints = intel_nehalem_event_constraints;
> + x86_pmu.percore_constraints =
> + intel_nehalem_percore_constraints;
> x86_pmu.enable_all = intel_pmu_nhm_enable_all;
> + x86_pmu.extra_regs = intel_nehalem_extra_regs;
> pr_cont("Nehalem events, ");
> break;
>
> @@ -1058,7 +1253,10 @@ static __init int intel_pmu_init(void)
> intel_pmu_lbr_init_nhm();
>
> x86_pmu.event_constraints = intel_westmere_event_constraints;
> + x86_pmu.percore_constraints =
> + intel_westmere_percore_constraints;
> x86_pmu.enable_all = intel_pmu_nhm_enable_all;
> + x86_pmu.extra_regs = intel_westmere_extra_regs;
> pr_cont("Westmere events, ");
> break;
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index dda5b0a..d24d9ab 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -539,6 +539,8 @@ struct hw_perf_event {
> unsigned long event_base;
> int idx;
> int last_cpu;
> + unsigned int extra_reg;
> + u64 extra_config;
> };
> struct { /* software */
> struct hrtimer hrtimer;
> --
> 1.7.3
>
>
>
>
>
>
Powered by blists - more mailing lists