lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e8b5086e-735f-51ea-f413-3b2d456c0e32@linux.intel.com>
Date:   Mon, 29 Aug 2022 11:55:12 -0400
From:   "Liang, Kan" <kan.liang@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>, x86@...nel.org,
        eranian@...gle.com, ravi.bangoria@....com
Cc:     linux-kernel@...r.kernel.org, acme@...nel.org,
        mark.rutland@....com, alexander.shishkin@...ux.intel.com,
        jolsa@...nel.org, namhyung@...nel.org
Subject: Re: [PATCH v2 9/9] perf/x86/intel: Optimize short PEBS counters



On 2022-08-29 6:10 a.m., Peter Zijlstra wrote:
> XXX: crazy idea; really not sure this is worth the extra complexity
> 
> It is possible to have the counter programmed to a value smaller than
> the sampling period.

I'm not quite sure how the above case can be triggered.

For the most of the cases, the pmc_prev_left[idx] should be the same as
the hwc->period_left.

For the left < 2 or the limit_period case, I think perf usually program
a larger value, so the pmc_prev_left[idx] > hwc->period_left.

It looks like the only case, which triggers the pmc_prev_left[idx] <
hwc->period_left, is the left > max_period. I don't think it's common
for a user to set a period which is larger than the HW counter limit.
Even if they set a huge period, the PEBS overhead should not be an
issue, since it may causes days to trigger a sample.

If so, it may not be a good idea to introduce such complexity to only
handle such rare cases.

Thanks,
Kan

> In that case, the code suppresses the sample,
> recalculates the remaining events and reprograms the counter.
> 
> This should also work for PEBS counters (and it does); however
> triggering a full PEBS assist and parsing the event from the DS is
> more overhead than is required.
> 
> As such, detect this case and temporarily suppress PEBS. This will
> then trigger a regular PMI for the counter which will reprogram the
> event and re-enable PEBS once the target period is in reach.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
>  arch/x86/events/intel/core.c |   80 ++++++++++++++++++++++++++++++++++++++-----
>  arch/x86/events/perf_event.h |    9 ++++
>  2 files changed, 81 insertions(+), 8 deletions(-)
> 
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2722,12 +2722,7 @@ static void intel_pmu_enable_fixed(struc
>  
>  	intel_set_masks(event, idx);
>  
> -	/*
> -	 * Enable IRQ generation (0x8), if not PEBS,
> -	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
> -	 * if requested:
> -	 */
> -	if (!event->attr.precise_ip)
> +	if (hwc->config & ARCH_PERFMON_EVENTSEL_INT)
>  		bits |= 0x8;
>  	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
>  		bits |= 0x2;
> @@ -2816,12 +2811,75 @@ int intel_pmu_save_and_restart(struct pe
>  	return static_call(x86_pmu_set_period)(event);
>  }
>  
> +static void intel_pmu_update_config(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	u64 config = hwc->config;
> +
> +	if (hwc->idx >= INTEL_PMC_IDX_FIXED) { /* PEBS is limited to real PMCs */
> +		u64 mask = 0xf, bits = 0;
> +
> +		if (config & ARCH_PERFMON_EVENTSEL_INT)
> +			bits |= 0x8;
> +		if (config & ARCH_PERFMON_EVENTSEL_USR)
> +			bits |= 0x2;
> +		if (config & ARCH_PERFMON_EVENTSEL_OS)
> +			bits |= 0x1;
> +
> +		bits <<= (hwc->idx * 4);
> +		mask <<= (hwc->idx * 4);
> +
> +		config = this_cpu_read(intel_fixed_ctrl);
> +		config &= ~mask;
> +		config |= bits;
> +		this_cpu_write(intel_fixed_ctrl, config);
> +	}
> +
> +	wrmsrl(hwc->config_base, config);
> +}
> +
> +static void intel_pmu_handle_short_pebs(struct perf_event *event)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	/* if the event is not enabled; intel_pmu_pebs_enable() DTRT */
> +	if (!test_bit(hwc->idx, cpuc->active_mask))
> +		return;
> +
> +	WARN_ON_ONCE(cpuc->enabled);
> +
> +	if (intel_pmu_is_short_pebs(event)) {
> +
> +		/* stripped down intel_pmu_pebs_disable() */
> +		cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
> +		hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
> +
> +		intel_pmu_update_config(event);
> +
> +	} else if (!(cpuc->pebs_enabled & (1ULL << hwc->idx))) {
> +
> +		/* stripped down intel_pmu_pebs_enable() */
> +		hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
> +		cpuc->pebs_enabled |= (1ULL << hwc->idx);
> +
> +		intel_pmu_update_config(event);
> +	}
> +}
> +
>  static int intel_pmu_set_period(struct perf_event *event)
>  {
> +	int ret;
> +
>  	if (unlikely(is_topdown_count(event)))
>  		return static_call(intel_pmu_set_topdown_event_period)(event);
>  
> -	return x86_perf_event_set_period(event);
> +	ret = x86_perf_event_set_period(event);
> +
> +	if (event->attr.precise_ip)
> +		intel_pmu_handle_short_pebs(event);
> +
> +	return ret;
>  }
>  
>  static u64 intel_pmu_update(struct perf_event *event)
> @@ -2975,6 +3033,9 @@ static int handle_pmi_common(struct pt_r
>  		 * MSR_IA32_PEBS_ENABLE is not updated. Because the
>  		 * cpuc->enabled has been forced to 0 in PMI.
>  		 * Update the MSR if pebs_enabled is changed.
> +		 *
> +		 * Also; short counters temporarily disable PEBS, see
> +		 * intel_pmu_set_period().
>  		 */
>  		if (pebs_enabled != cpuc->pebs_enabled)
>  			wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
> @@ -3856,7 +3917,10 @@ static int intel_pmu_hw_config(struct pe
>  		if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
>  			return -EINVAL;
>  
> -		if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
> +		if (!(event->attr.freq ||
> +		      (event->attr.wakeup_events && !event->attr.watermark) ||
> +		      event->attr.sample_period > x86_pmu.max_period)) {
> +
>  			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
>  			if (!(event->attr.sample_type &
>  			      ~intel_pmu_large_pebs_flags(event))) {
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -1063,6 +1063,15 @@ static inline bool x86_pmu_has_lbr_calls
>  DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
>  DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
>  
> +static inline bool intel_pmu_is_short_pebs(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	s64 counter = this_cpu_read(pmc_prev_left[hwc->idx]);
> +	s64 left = local64_read(&hwc->period_left);
> +
> +	return counter < left;
> +}
> +
>  int x86_perf_event_set_period(struct perf_event *event);
>  
>  /*
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ