lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d7dd633b-e28a-155a-a8e2-0e5a83b4eead@linux.ibm.com>
Date:   Wed, 24 Mar 2021 10:05:23 +0530
From:   Madhavan Srinivasan <maddy@...ux.ibm.com>
To:     Athira Rajeev <atrajeev@...ux.vnet.ibm.com>,
        linuxppc-dev@...ts.ozlabs.org, linux-kernel@...r.kernel.org,
        linux-perf-users@...r.kernel.org, mpe@...erman.id.au,
        acme@...nel.org, jolsa@...nel.org
Cc:     ravi.bangoria@...ux.ibm.com, kjain@...ux.ibm.com,
        kan.liang@...ux.intel.com, peterz@...radead.org
Subject: Re: [PATCH V2 1/5] powerpc/perf: Expose processor pipeline stage
 cycles using PERF_SAMPLE_WEIGHT_STRUCT


On 3/22/21 8:27 PM, Athira Rajeev wrote:
> Performance Monitoring Unit (PMU) registers in powerpc provides
> information on cycles elapsed between different stages in the
> pipeline. This can be used for application tuning. On ISA v3.1
> platform, this information is exposed by sampling registers.
> Patch adds kernel support to capture two of the cycle counters
> as part of perf sample using the sample type:
> PERF_SAMPLE_WEIGHT_STRUCT.
>
> The power PMU function 'get_mem_weight' currently uses 64 bit weight
> field of perf_sample_data to capture memory latency. But following the
> introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain
> 64-bit or 32-bit value depending on the architexture support for
> PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the
> pipeline stage cycles info. Hence update the ppmu functions to work for
> 64-bit and 32-bit weight values.
>
> If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field.
> if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem
> latency is stored in the low 32bits of perf_sample_weight structure.
> Also for CPU_FTR_ARCH_31, capture the two cycle counter information in
> two 16 bit fields of perf_sample_weight structure.

Changes looks fine to me.

Reviewed-by: Madhavan Srinivasan <maddy@...ux.ibm.com>


> Signed-off-by: Athira Rajeev <atrajeev@...ux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/perf_event_server.h |  2 +-
>   arch/powerpc/perf/core-book3s.c              |  4 ++--
>   arch/powerpc/perf/isa207-common.c            | 29 +++++++++++++++++++++++++---
>   arch/powerpc/perf/isa207-common.h            |  6 +++++-
>   4 files changed, 34 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
> index 00e7e671bb4b..112cf092d7b3 100644
> --- a/arch/powerpc/include/asm/perf_event_server.h
> +++ b/arch/powerpc/include/asm/perf_event_server.h
> @@ -43,7 +43,7 @@ struct power_pmu {
>   				u64 alt[]);
>   	void		(*get_mem_data_src)(union perf_mem_data_src *dsrc,
>   				u32 flags, struct pt_regs *regs);
> -	void		(*get_mem_weight)(u64 *weight);
> +	void		(*get_mem_weight)(u64 *weight, u64 type);
>   	unsigned long	group_constraint_mask;
>   	unsigned long	group_constraint_val;
>   	u64             (*bhrb_filter_map)(u64 branch_sample_type);
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 766f064f00fb..6936763246bd 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2206,9 +2206,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>   						ppmu->get_mem_data_src)
>   			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
>   
> -		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
> +		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
>   						ppmu->get_mem_weight)
> -			ppmu->get_mem_weight(&data.weight.full);
> +			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
>   
>   		if (perf_event_overflow(event, &data, regs))
>   			power_pmu_stop(event, 0);
> diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
> index e4f577da33d8..5dcbdbd54598 100644
> --- a/arch/powerpc/perf/isa207-common.c
> +++ b/arch/powerpc/perf/isa207-common.c
> @@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
>   	}
>   }
>   
> -void isa207_get_mem_weight(u64 *weight)
> +void isa207_get_mem_weight(u64 *weight, u64 type)
>   {
> +	union perf_sample_weight *weight_fields;
> +	u64 weight_lat;
>   	u64 mmcra = mfspr(SPRN_MMCRA);
>   	u64 exp = MMCRA_THR_CTR_EXP(mmcra);
>   	u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
> @@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight)
>   		mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
>   
>   	if (val == 0 || val == 7)
> -		*weight = 0;
> +		weight_lat = 0;
>   	else
> -		*weight = mantissa << (2 * exp);
> +		weight_lat = mantissa << (2 * exp);
> +
> +	/*
> +	 * Use 64 bit weight field (full) if sample type is
> +	 * WEIGHT.
> +	 *
> +	 * if sample type is WEIGHT_STRUCT:
> +	 * - store memory latency in the lower 32 bits.
> +	 * - For ISA v3.1, use remaining two 16 bit fields of
> +	 *   perf_sample_weight to store cycle counter values
> +	 *   from sier2.
> +	 */
> +	weight_fields = (union perf_sample_weight *)weight;
> +	if (type & PERF_SAMPLE_WEIGHT)
> +		weight_fields->full = weight_lat;
> +	else {
> +		weight_fields->var1_dw = (u32)weight_lat;
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
> +			weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
> +		}
> +	}
>   }
>   
>   int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
> diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
> index 1af0e8c97ac7..fc30d43c4d0c 100644
> --- a/arch/powerpc/perf/isa207-common.h
> +++ b/arch/powerpc/perf/isa207-common.h
> @@ -265,6 +265,10 @@
>   #define ISA207_SIER_DATA_SRC_SHIFT	53
>   #define ISA207_SIER_DATA_SRC_MASK	(0x7ull << ISA207_SIER_DATA_SRC_SHIFT)
>   
> +/* Bits in SIER2/SIER3 for Power10 */
> +#define P10_SIER2_FINISH_CYC(sier2)	(((sier2) >> (63 - 37)) & 0x7fful)
> +#define P10_SIER2_DISPATCH_CYC(sier2)	(((sier2) >> (63 - 13)) & 0x7fful)
> +
>   #define P(a, b)				PERF_MEM_S(a, b)
>   #define PH(a, b)			(P(LVL, HIT) | P(a, b))
>   #define PM(a, b)			(P(LVL, MISS) | P(a, b))
> @@ -278,6 +282,6 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
>   					const unsigned int ev_alt[][MAX_ALT]);
>   void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
>   							struct pt_regs *regs);
> -void isa207_get_mem_weight(u64 *weight);
> +void isa207_get_mem_weight(u64 *weight, u64 type);
>   
>   #endif

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ