[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4F2238FA.2090509@linux.vnet.ibm.com>
Date: Fri, 27 Jan 2012 11:11:14 +0530
From: Anshuman Khandual <khandual@...ux.vnet.ibm.com>
To: Stephane Eranian <eranian@...gle.com>
CC: linux-kernel@...r.kernel.org, peterz@...radead.org, mingo@...e.hu,
acme@...radead.org, robert.richter@....com, ming.m.lin@...el.com,
andi@...stfloor.org, asharma@...com, ravitillo@....gov,
vweaver1@...s.utk.edu
Subject: Re: [PATCH 05/13] perf_events: add LBR mappings for PERF_SAMPLE_BRANCH
filters (v3)
On Monday 09 January 2012 10:19 PM, Stephane Eranian wrote:
> This patch adds the mappings from the generic PERF_SAMPLE_BRANCH_*
> filters to the actual Intel X86 LBR filters, whenever they exist.
>
> Signed-off-by: Stephane Eranian <eranian@...gle.com>
Reviewed by: Anshuman Khandual <khandual@...ux.vnet.ibm.com>
> ---
> arch/x86/kernel/cpu/perf_event.h | 2 +
> arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
> arch/x86/kernel/cpu/perf_event_intel_lbr.c | 99 +++++++++++++++++++++++++++-
> 3 files changed, 100 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index 4535ada..776fb5a 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -535,6 +535,8 @@ void intel_pmu_lbr_init_nhm(void);
>
> void intel_pmu_lbr_init_atom(void);
>
> +void intel_pmu_lbr_init_snb(void);
> +
> int p4_pmu_init(void);
>
> int p6_pmu_init(void);
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 97f7bb5..b0db016 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1757,7 +1757,7 @@ __init int intel_pmu_init(void)
> memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
>
> - intel_pmu_lbr_init_nhm();
> + intel_pmu_lbr_init_snb();
>
> x86_pmu.event_constraints = intel_snb_event_constraints;
> x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> index e14431f..8a1eb6c 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -14,6 +14,47 @@ enum {
> };
>
> /*
> + * Intel LBR_SELECT bits
> + * Intel Vol3a, April 2011, Section 16.7 Table 16-10
> + *
> + * Hardware branch filter (not available on all CPUs)
> + */
> +#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
> +#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
> +#define LBR_JCC_BIT 2 /* do not capture conditional branches */
> +#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
> +#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
> +#define LBR_RETURN_BIT 5 /* do not capture near returns */
> +#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
> +#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
> +#define LBR_FAR_BIT 8 /* do not capture far branches */
> +
> +#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
> +#define LBR_USER (1 << LBR_USER_BIT)
> +#define LBR_JCC (1 << LBR_JCC_BIT)
> +#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
> +#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
> +#define LBR_RETURN (1 << LBR_RETURN_BIT)
> +#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
> +#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
> +#define LBR_FAR (1 << LBR_FAR_BIT)
> +
> +#define LBR_PLM (LBR_KERNEL | LBR_USER)
> +
> +#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
> +
> +#define LBR_ANY \
> + (LBR_JCC |\
> + LBR_REL_CALL |\
> + LBR_IND_CALL |\
> + LBR_RETURN |\
> + LBR_REL_JMP |\
> + LBR_IND_JMP |\
> + LBR_FAR)
> +
> +#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
> +
> +/*
> * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
> * otherwise it becomes near impossible to get a reliable stack.
> */
> @@ -153,8 +194,6 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
> cpuc->lbr_stack.nr = i;
> }
>
> -#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
> -
> /*
> * Due to lack of segmentation in Linux the effective address (offset)
> * is the same as the linear address, allowing us to merge the LIP and EIP
> @@ -202,26 +241,82 @@ void intel_pmu_lbr_read(void)
> intel_pmu_lbr_read_64(cpuc);
> }
>
> +/*
> + * Map interface branch filters onto LBR filters
> + */
> +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
> + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
> + [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
> + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
> + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
> + | LBR_IND_JMP | LBR_FAR,
> + /*
> + * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
> + */
> + [PERF_SAMPLE_BRANCH_ANY_CALL] =
> + LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
> + /*
> + * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
> + */
> + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
> +};
> +
> +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
> + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
> + [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
> + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
> + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
> + [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
> + | LBR_FAR,
> + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
> +};
> +
> +/* core */
> void intel_pmu_lbr_init_core(void)
> {
> x86_pmu.lbr_nr = 4;
> x86_pmu.lbr_tos = MSR_LBR_TOS;
> x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
> x86_pmu.lbr_to = MSR_LBR_CORE_TO;
> +
> + pr_cont("4-deep LBR, ");
> }
>
> +/* nehalem/westmere */
> void intel_pmu_lbr_init_nhm(void)
> {
> x86_pmu.lbr_nr = 16;
> x86_pmu.lbr_tos = MSR_LBR_TOS;
> x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
> x86_pmu.lbr_to = MSR_LBR_NHM_TO;
> +
> + x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
> + x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
> +
> + pr_cont("16-deep LBR, ");
> }
>
> +/* sandy bridge */
> +void intel_pmu_lbr_init_snb(void)
> +{
> + x86_pmu.lbr_nr = 16;
> + x86_pmu.lbr_tos = MSR_LBR_TOS;
> + x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
> + x86_pmu.lbr_to = MSR_LBR_NHM_TO;
> +
> + x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
> + x86_pmu.lbr_sel_map = snb_lbr_sel_map;
> +
> + pr_cont("16-deep LBR, ");
> +}
> +
> +/* atom */
> void intel_pmu_lbr_init_atom(void)
> {
> x86_pmu.lbr_nr = 8;
> x86_pmu.lbr_tos = MSR_LBR_TOS;
> x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
> x86_pmu.lbr_to = MSR_LBR_CORE_TO;
> +
> + pr_cont("8-deep LBR, ");
> }
--
Linux Technology Centre
IBM Systems and Technology Group
Bangalore India
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists