Implement support for Intel LBR stacks that support FREEZE_LBRS_ON_PMI. We do not (yet?) support the LBR config register because that is SMT wide and would also put undue restraints on the PEBS users. Signed-off-by: Peter Zijlstra LKML-Reference: --- arch/x86/kernel/cpu/perf_event.c | 18 ++ arch/x86/kernel/cpu/perf_event_intel.c | 13 + arch/x86/kernel/cpu/perf_event_intel_lbr.c | 228 +++++++++++++++++++++++++++++ include/linux/perf_event.h | 11 + 4 files changed, 270 insertions(+) Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c @@ -48,6 +48,8 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +#define MAX_LBR_ENTRIES 16 + struct cpu_hw_events { /* * Generic x86 PMC bits @@ -70,6 +72,14 @@ struct cpu_hw_events { u64 pebs_enabled; /* + * Intel LBR bits + */ + int lbr_users; + void *lbr_context; + struct perf_branch_stack lbr_stack; + struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + + /* * AMD specific bits */ struct amd_nb *amd_nb; @@ -154,6 +164,13 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + + /* + * Intel LBR + */ + unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ + int lbr_nr; /* hardware stack size */ + int lbr_format; /* hardware format */ }; static struct x86_pmu x86_pmu __read_mostly; @@ -1238,6 +1255,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_intel_lbr.c" #include "perf_event_intel_ds.c" #include "perf_event_intel.c" Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c @@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void) intel_pmu_disable_bts(); intel_pmu_pebs_disable_all(); + intel_pmu_lbr_disable_all(); } static void intel_pmu_enable_all(void) @@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void) } intel_pmu_pebs_enable_all(); + intel_pmu_lbr_enable_all(); } static inline u64 intel_pmu_get_status(void) @@ -675,6 +677,8 @@ again: inc_irq_stat(apic_perf_irqs); ack = status; + intel_pmu_lbr_read(); + /* * PEBS overflow sets bit 62 in the global status register */ @@ -847,6 +851,8 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_core(); + x86_pmu.event_constraints = intel_core2_event_constraints; pr_cont("Core2 events, "); break; @@ -856,13 +862,18 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_nhm(); + x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; + case 28: /* Atom */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_atom(); + x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; @@ -872,6 +883,8 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_nhm(); + x86_pmu.event_constraints = intel_westmere_event_constraints; pr_cont("Westmere events, "); break; Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c =================================================================== --- /dev/null +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -0,0 +1,228 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +enum { + LBR_FORMAT_32 = 0x00, + LBR_FORMAT_LIP = 0x01, + LBR_FORMAT_EIP = 0x02, + LBR_FORMAT_EIP_FLAGS = 0x03, +}; + +/* + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI + * otherwise it becomes near impossible to get a reliable stack. + */ + +#define X86_DEBUGCTL_LBR (1 << 0) +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) + +static void __intel_pmu_lbr_enable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +static void __intel_pmu_lbr_disable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +static void intel_pmu_lbr_reset_32(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) + wrmsrl(x86_pmu.lbr_from + i, 0); +} + +static void intel_pmu_lbr_reset_64(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrl(x86_pmu.lbr_to + i, 0); + } +} + +static void intel_pmu_lbr_reset(void) +{ + if (x86_pmu.lbr_format == LBR_FORMAT_32) + intel_pmu_lbr_reset_32(); + else + intel_pmu_lbr_reset_64(); +} + +static void intel_pmu_lbr_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + WARN_ON(cpuc->enabled); + + /* + * Reset the LBR stack if this is the first LBR user or + * we changed task context so as to avoid data leaks. + */ + + if (!cpuc->lbr_users || + (event->ctx->task && cpuc->lbr_context != event->ctx)) { + intel_pmu_lbr_reset(); + cpuc->lbr_context = event->ctx; + } + + cpuc->lbr_users++; +} + +static void intel_pmu_lbr_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + cpuc->lbr_users--; + + BUG_ON(cpuc->lbr_users < 0); + WARN_ON(cpuc->enabled); +} + +static void intel_pmu_lbr_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_users) + __intel_pmu_lbr_enable(); +} + +static void intel_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_users) + __intel_pmu_lbr_disable(); +} + +static inline u64 intel_pmu_lbr_tos(void) +{ + u64 tos; + + rdmsrl(x86_pmu.lbr_tos, tos); + + return tos; +} + +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { + unsigned long lbr_idx = (tos - i) & mask; + union { + struct { + u32 from; + u32 to; + }; + u64 lbr; + } msr_lastbranch; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + + cpuc->lbr_entries[i].from = msr_lastbranch.from; + cpuc->lbr_entries[i].to = msr_lastbranch.to; + cpuc->lbr_entries[i].flags = 0; + } + cpuc->lbr_stack.nr = i; +} + +#define LBR_FROM_FLAG_MISPRED (1ULL << 63) + +/* + * Due to lack of segmentation in Linux the effective address (offset) + * is the same as the linear address, allowing us to merge the LIP and EIP + * LBR formats. + */ +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { + unsigned long lbr_idx = (tos - i) & mask; + u64 from, to, flags = 0; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, from); + rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + + if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) { + flags = !!(from & LBR_FROM_FLAG_MISPRED); + from = (u64)((((s64)from) << 1) >> 1); + } + + cpuc->lbr_entries[i].from = from; + cpuc->lbr_entries[i].to = to; + cpuc->lbr_entries[i].flags = flags; + } + cpuc->lbr_stack.nr = i; +} + +static void intel_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->lbr_users) + return; + + if (x86_pmu.lbr_format == LBR_FORMAT_32) + intel_pmu_lbr_read_32(cpuc); + else + intel_pmu_lbr_read_64(cpuc); +} + +static int intel_pmu_lbr_format(void) +{ + u64 capabilities; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + return capabilities & 0x1f; +} + +static void intel_pmu_lbr_init_core(void) +{ + x86_pmu.lbr_format = intel_pmu_lbr_format(); + x86_pmu.lbr_nr = 4; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x40; + x86_pmu.lbr_to = 0x60; +} + +static void intel_pmu_lbr_init_nhm(void) +{ + x86_pmu.lbr_format = intel_pmu_lbr_format(); + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x680; + x86_pmu.lbr_to = 0x6c0; +} + +static void intel_pmu_lbr_init_atom(void) +{ + x86_pmu.lbr_format = intel_pmu_lbr_format(); + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x40; + x86_pmu.lbr_to = 0x60; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ Index: linux-2.6/include/linux/perf_event.h =================================================================== --- linux-2.6.orig/include/linux/perf_event.h +++ linux-2.6/include/linux/perf_event.h @@ -467,6 +467,17 @@ struct perf_raw_record { void *data; }; +struct perf_branch_entry { + __u64 from; + __u64 to; + __u64 flags; +}; + +struct perf_branch_stack { + __u64 nr; + struct perf_branch_entry entries[0]; +}; + struct task_struct; /** -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/