lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 8 Apr 2019 12:06:31 -0400
From:   "Liang, Kan" <kan.liang@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     acme@...nel.org, mingo@...hat.com, linux-kernel@...r.kernel.org,
        tglx@...utronix.de, jolsa@...nel.org, eranian@...gle.com,
        alexander.shishkin@...ux.intel.com, ak@...ux.intel.com
Subject: Re: [PATCH V5 00/12] perf: Add Icelake support (kernel only, except
 Topdown)


On 4/8/2019 11:41 AM, Peter Zijlstra wrote:
> 
> I currently have something like the below on top, is that correct?

Yes, it's correct.

> 
> If so, I'll fold it back in.

Thanks. It's really appreciated.

Kan

> 
> 
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -563,16 +563,17 @@ int x86_pmu_hw_config(struct perf_event
>   	/* sample_regs_user never support XMM registers */
>   	if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
>   		return -EINVAL;
> +
>   	/*
>   	 * Besides the general purpose registers, XMM registers may
>   	 * be collected in PEBS on some platforms, e.g. Icelake
>   	 */
>   	if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
> -		if (!is_sampling_event(event) ||
> -		    !event->attr.precise_ip ||
> -		    x86_pmu.pebs_no_xmm_regs)
> +		if (x86_pmu.pebs_no_xmm_regs)
>   			return -EINVAL;
>   
> +		if (!event->attr.precise_ip)
> +			return -EINVAL;
>   	}
>   
>   	return x86_setup_perfctr(event);
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3428,7 +3428,7 @@ icl_get_event_constraints(struct cpu_hw_
>   	 * Force instruction:ppp in Fixed counter 0
>   	 */
>   	if ((event->attr.precise_ip == 3) &&
> -	    ((event->hw.config & X86_RAW_EVENT_MASK) == 0x00c0))
> +	    (event->hw.config == X86_CONFIG(.event=0xc0)))
>   		return &fixed_counter0_constraint;
>   
>   	return hsw_get_event_constraints(cpuc, idx, event);
> @@ -4810,7 +4810,7 @@ __init int intel_pmu_init(void)
>   			hsw_format_attr : nhm_format_attr;
>   		extra_attr = merge_attr(extra_attr, skl_format_attr);
>   		x86_pmu.cpu_events = get_icl_events_attrs();
> -		x86_pmu.force_gpr_event = 0x2ca;
> +		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
>   		x86_pmu.lbr_pt_coexist = true;
>   		intel_pmu_pebs_data_source_skl(false);
>   		pr_cont("Icelake events, ");
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -853,13 +853,13 @@ struct event_constraint intel_icl_pebs_e
>   	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
>   	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL),	/* SLOTS */
>   
> -	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),		/* MEM_TRANS_RETIRED.LOAD_LATENCY */
> -	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),  /* MEM_INST_RETIRED.LOAD */
> -	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),  /* MEM_INST_RETIRED.STORE */
> +	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),			/* MEM_TRANS_RETIRED.LOAD_LATENCY */
> +	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),	/* MEM_INST_RETIRED.LOAD */
> +	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),	/* MEM_INST_RETIRED.STORE */
>   
>   	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
>   
> -	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), 	     /* MEM_INST_RETIRED.* */
> +	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),		/* MEM_INST_RETIRED.* */
>   
>   	/*
>   	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
> @@ -963,40 +963,42 @@ static u64 pebs_update_adaptive_cfg(stru
>   	u64 pebs_data_cfg = 0;
>   	bool gprs, tsx_weight;
>   
> -	if ((sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) ||
> -	    attr->precise_ip < 2) {
> +	if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
> +	    attr->precise_ip > 1)
> +		return pebs_data_cfs;
>   
> -		if (sample_type & PERF_PEBS_MEMINFO_TYPE)
> -			pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
> +	if (sample_type & PERF_PEBS_MEMINFO_TYPE)
> +		pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
>   
> +	/*
> +	 * We need GPRs when:
> +	 * + user requested them
> +	 * + precise_ip < 2 for the non event IP
> +	 * + For RTM TSX weight we need GPRs for the abort code.
> +	 */
> +	gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
> +	       (attr->sample_regs_intr & PEBS_GPRS_REGS);
> +
> +	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
> +		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
> +		      x86_pmu.rtm_abort_event);
> +
> +	if (gprs || (attr->precise_ip < 2) || tsx_weight)
> +		pebs_data_cfg |= PEBS_DATACFG_GPRS;
> +
> +	if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
> +	    (attr->sample_regs_intr & PERF_XMM_REGS))
> +		pebs_data_cfg |= PEBS_DATACFG_XMMS;
> +
> +	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
>   		/*
> -		 * Cases we need the registers:
> -		 * + user requested registers
> -		 * + precise_ip < 2 for the non event IP
> -		 * + For RTM TSX weight we need GPRs too for the abort
> -		 * code. But we don't want to force GPRs for all other
> -		 * weights.  So add it only collectfor the RTM abort event.
> +		 * For now always log all LBRs. Could configure this
> +		 * later.
>   		 */
> -		gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
> -			      (attr->sample_regs_intr & 0xffffffff);
> -		tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
> -			     ((attr->config & 0xffff) == x86_pmu.force_gpr_event);
> -		if (gprs || (attr->precise_ip < 2) || tsx_weight)
> -			pebs_data_cfg |= PEBS_DATACFG_GPRS;
> -
> -		if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
> -		    (attr->sample_regs_intr >> 32))
> -			pebs_data_cfg |= PEBS_DATACFG_XMMS;
> -
> -		if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
> -			/*
> -			 * For now always log all LBRs. Could configure this
> -			 * later.
> -			 */
> -			pebs_data_cfg |= PEBS_DATACFG_LBRS |
> -				((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
> -		}
> +		pebs_data_cfg |= PEBS_DATACFG_LBRS |
> +			((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
>   	}
> +
>   	return pebs_data_cfg;
>   }
>   
> @@ -1022,13 +1024,8 @@ pebs_update_state(bool needed_cb, struct
>   	}
>   
>   	/*
> -	 * The PEBS record doesn't shrink on the del. Because to get
> -	 * an accurate config needs to go through all the existing pebs events.
> -	 * It's not necessary.
> -	 * There is no harmful for a bigger PEBS record, except little
> -	 * performance impacts.
> -	 * Also, for most cases, the same pebs config is applied for all
> -	 * pebs events.
> +	 * The PEBS record doesn't shrink on pmu::del(). Doing so would require
> +	 * iterating all remaining PEBS events to reconstruct the config.
>   	 */
>   	if (x86_pmu.intel_cap.pebs_baseline && add) {
>   		u64 pebs_data_cfg;
> @@ -1076,8 +1073,7 @@ void intel_pmu_pebs_enable(struct perf_e
>   
>   	cpuc->pebs_enabled |= 1ULL << hwc->idx;
>   
> -	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
> -	    (x86_pmu.version < 5))
> +	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
>   		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
>   	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
>   		cpuc->pebs_enabled |= 1ULL << 63;
> @@ -1766,8 +1762,7 @@ static void intel_pmu_drain_pebs_core(st
>   			       setup_pebs_fixed_sample_data);
>   }
>   
> -static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc,
> -						 int size)
> +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
>   {
>   	struct perf_event *event;
>   	int bit;
> @@ -1826,8 +1821,7 @@ static void intel_pmu_drain_pebs_nhm(str
>   
>   		/* PEBS v3 has more accurate status bits */
>   		if (x86_pmu.intel_cap.pebs_format >= 3) {
> -			for_each_set_bit(bit, (unsigned long *)&pebs_status,
> -					 size)
> +			for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
>   				counts[bit]++;
>   
>   			continue;
> @@ -1866,8 +1860,7 @@ static void intel_pmu_drain_pebs_nhm(str
>   		 * If collision happened, the record will be dropped.
>   		 */
>   		if (p->status != (1ULL << bit)) {
> -			for_each_set_bit(i, (unsigned long *)&pebs_status,
> -					 x86_pmu.max_pebs_events)
> +			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
>   				error[i]++;
>   			continue;
>   		}
> @@ -1875,7 +1868,7 @@ static void intel_pmu_drain_pebs_nhm(str
>   		counts[bit]++;
>   	}
>   
> -	for (bit = 0; bit < size; bit++) {
> +	for_each_set_bit(bit, (unsigned long *)&mask, size) {
>   		if ((counts[bit] == 0) && (error[bit] == 0))
>   			continue;
>   
> @@ -1939,7 +1932,7 @@ static void intel_pmu_drain_pebs_icl(str
>   			counts[bit]++;
>   	}
>   
> -	for (bit = 0; bit < size; bit++) {
> +	for_each_set_bit(bit, (unsigned long *)mask, size) {
>   		if (counts[bit] == 0)
>   			continue;
>   
> @@ -1980,6 +1973,9 @@ void __init intel_ds_init(void)
>   		char *pebs_qual = "";
>   		int format = x86_pmu.intel_cap.pebs_format;
>   
> +		if (format < 4)
> +			x86_pmu.intel_cap.pebs_baseline = 0;
> +
>   		switch (format) {
>   		case 0:
>   			pr_cont("PEBS fmt0%c, ", pebs_type);
> @@ -2042,8 +2038,6 @@ void __init intel_ds_init(void)
>   			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
>   			x86_pmu.pebs = 0;
>   		}
> -		if (format != 4)
> -			x86_pmu.intel_cap.pebs_baseline = 0;
>   	}
>   }
>   
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -303,8 +303,8 @@ struct cpu_hw_events {
>   	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
>   
>   /*
> - * Only works for Intel events, which has 'small' event codes.
> - * Need to fix the rang compare for 'big' event codes, e.g AMD64_EVENTSEL_EVENT
> + * The constraint_match() function only works for 'simple' event codes
> + * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
>    */
>   #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
>   	__EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
> @@ -672,12 +672,12 @@ struct x86_pmu {
>   			pebs_no_xmm_regs	:1;
>   	int		pebs_record_size;
>   	int		pebs_buffer_size;
> +	int		max_pebs_events;
>   	void		(*drain_pebs)(struct pt_regs *regs);
>   	struct event_constraint *pebs_constraints;
>   	void		(*pebs_aliases)(struct perf_event *event);
> -	int 		max_pebs_events;
>   	unsigned long	large_pebs_flags;
> -	u64		force_gpr_event;
> +	u64		rtm_abort_event;
>   
>   	/*
>   	 * Intel LBR
> 

Powered by blists - more mailing lists