lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250314102031.GL19344@noisy.programming.kicks-ass.net>
Date: Fri, 14 Mar 2025 11:20:31 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: kan.liang@...ux.intel.com
Cc: mingo@...nel.org, acme@...nel.org, namhyung@...nel.org,
	irogers@...gle.com, adrian.hunter@...el.com, ak@...ux.intel.com,
	linux-kernel@...r.kernel.org, eranian@...gle.com,
	thomas.falcon@...el.com
Subject: Re: [PATCH V2 3/3] perf/x86/intel: Support auto counter reload

On Thu, Oct 10, 2024 at 12:28:44PM -0700, kan.liang@...ux.intel.com wrote:

> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 726ef13c2c81..d3bdc7d18d3f 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2851,6 +2851,54 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
>  	cpuc->fixed_ctrl_val |= bits;
>  }
>  
> +static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +	int msr_b, msr_c;
> +
> +	if (!mask && !cpuc->acr_cfg_b[idx])
> +		return;
> +
> +	if (idx < INTEL_PMC_IDX_FIXED) {
> +		msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
> +		msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
> +	} else {
> +		msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
> +		msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
> +		idx -= INTEL_PMC_IDX_FIXED;
> +	}
> +
> +	if (cpuc->acr_cfg_b[idx] != mask) {
> +		wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask);
> +		cpuc->acr_cfg_b[idx] = mask;
> +	}
> +	/* Only need to update the reload value when there is a valid config value. */
> +	if (mask && cpuc->acr_cfg_c[idx] != reload) {
> +		wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload);
> +		cpuc->acr_cfg_c[idx] = reload;
> +	}
> +}
> +
> +static void intel_pmu_enable_acr(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	/* The PMU doesn't support ACR */
> +	if (!hybrid(event->pmu, acr_cntr_mask64))
> +		return;

In which case we shouldn't have been able to create such an event,
right?

But you need this because you do the cleanup here. Hmm, perhaps use a
static_call() to elide the whole call if the hardware doesn't support
this?

> +
> +	if (!is_acr_event_group(event) || !event->attr.config2) {
> +		/*
> +		 * The disable doesn't clear the ACR CFG register.
> +		 * Check and clear the ACR CFG register.
> +		 */
> +		intel_pmu_config_acr(hwc->idx, 0, 0);
> +		return;
> +	}
> +
> +	intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
> +}
> +
>  static void intel_pmu_enable_event(struct perf_event *event)
>  {
>  	u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
> @@ -2866,8 +2914,11 @@ static void intel_pmu_enable_event(struct perf_event *event)
>  			enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
>  		intel_set_masks(event, idx);
>  		__x86_pmu_enable_event(hwc, enable_mask);
> +		intel_pmu_enable_acr(event);
>  		break;
>  	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
> +		intel_pmu_enable_acr(event);
> +		fallthrough;
>  	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
>  		intel_pmu_enable_fixed(event);
>  		break;

This order is somewhat inconsistent. The GP events get enable,acr, while
the FIXED ones get acr,enable.

> @@ -3687,6 +3738,12 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
>  		c2->weight = hweight64(c2->idxmsk64);
>  	}
>  
> +	if (is_acr_event_group(event)) {
> +		c2 = dyn_constraint(cpuc, c2, idx);
> +		c2->idxmsk64 &= event->hw.dyn_mask;
> +		c2->weight = hweight64(c2->idxmsk64);
> +	}
> +
>  	return c2;
>  }
>  
> @@ -3945,6 +4002,78 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
>  	return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
>  }
>  
> +static bool intel_pmu_is_acr_group(struct perf_event *event)
> +{
> +	if (!hybrid(event->pmu, acr_cntr_mask64))
> +		return false;

Why the above check? Aaaah, because the function does two distinct
things. Perhaps split?

> +	/* The group leader has the ACR flag set */
> +	if (is_acr_event_group(event))
> +		return true;
> +
> +	/* The acr_mask is set */
> +	if (event->attr.config2)
> +		return true;
> +
> +	return false;
> +}
> +
> +static int intel_pmu_acr_check_reloadable_event(struct perf_event *event)
> +{
> +	struct perf_event *sibling, *leader = event->group_leader;
> +	int num = 0;
> +
> +	/*
> +	 * The acr_mask(config2) indicates the event can be reloaded by
> +	 * other events. Apply the acr_cntr_mask.
> +	 */
> +	if (leader->attr.config2) {
> +		leader->hw.dyn_mask = hybrid(leader->pmu, acr_cntr_mask64);
> +		num++;
> +	} else
> +		leader->hw.dyn_mask = ~0ULL;

Here..

> +	for_each_sibling_event(sibling, leader) {
> +		if (sibling->attr.config2) {
> +			sibling->hw.dyn_mask = hybrid(sibling->pmu, acr_cntr_mask64);
> +			num++;
> +		} else
> +			sibling->hw.dyn_mask = ~0ULL;

.. here ..

> +	}
> +
> +	if (event->attr.config2) {
> +		event->hw.dyn_mask = hybrid(event->pmu, acr_cntr_mask64);
> +		num++;
> +	} else
> +		event->hw.dyn_mask = ~0ULL;

.. and here, the else branch lost its {}.

Also, you managed to write the exact same logic 3 times, surely that can
be a helper function?

> +
> +	if (num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +/*
> + * Update the dyn_mask of each event to guarantee the event is scheduled
> + * to the counters which be able to cause a reload.
> + */
> +static void intel_pmu_set_acr_dyn_mask(struct perf_event *event, int idx,
> +				       struct perf_event *last)
> +{
> +	struct perf_event *sibling, *leader = event->group_leader;
> +	u64 mask = hybrid(event->pmu, acr_cntr_cause_mask);
> +
> +	/* The event set in the acr_mask(config2) can causes a reload. */
> +	if (test_bit(idx, (unsigned long *)&leader->attr.config2))
> +		event->hw.dyn_mask &= mask;
> +	for_each_sibling_event(sibling, leader) {
> +		if (test_bit(idx, (unsigned long *)&sibling->attr.config2))
> +			event->hw.dyn_mask &= mask;
> +	}
> +	if (test_bit(idx, (unsigned long *)&last->attr.config2))
> +		event->hw.dyn_mask &= mask;
> +}
> +
>  static int intel_pmu_hw_config(struct perf_event *event)
>  {
>  	int ret = x86_pmu_hw_config(event);
> @@ -4056,6 +4185,50 @@ static int intel_pmu_hw_config(struct perf_event *event)
>  		event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
>  	}
>  
> +	if (intel_pmu_is_acr_group(event)) {

This is where you want to check 2 things:

 - does hardare suport ACR, if not, fail
 - is event ACR, check constraints.

Current code obscures this distinction.

> +		struct perf_event *sibling, *leader = event->group_leader;
> +		int event_idx = 0;
> +
> +		/* Not support perf metrics */
> +		if (is_metric_event(event))
> +			return -EINVAL;
> +
> +		/* Not support freq mode */
> +		if (event->attr.freq)
> +			return -EINVAL;
> +
> +		/* PDist is not supported */
> +		if (event->attr.config2 && event->attr.precise_ip > 2)
> +			return -EINVAL;
> +
> +		/* The reload value cannot exceeds the max period */
> +		if (event->attr.sample_period > x86_pmu.max_period)
> +			return -EINVAL;
> +		/*
> +		 * It's hard to know whether the event is the last one of
> +		 * the group. Reconfigure the dyn_mask of each X86 event
> +		 * every time when add a new event.
> +		 * It's impossible to verify whether the bits of
> +		 * the event->attr.config2 exceeds the group. But it's
> +		 * harmless, because the invalid bits are ignored. See
> +		 * intel_pmu_update_acr_mask(). The n - n0 guarantees that
> +		 * only the bits in the group is used.
> +		 *
> +		 * Check whether the reloadable counters is enough and
> +		 * initialize the dyn_mask.
> +		 */
> +		if (intel_pmu_acr_check_reloadable_event(event))
> +			return -EINVAL;
> +
> +		/* Reconfigure the dyn_mask for each event */
> +		intel_pmu_set_acr_dyn_mask(leader, event_idx++, event);
> +		for_each_sibling_event(sibling, leader)
> +			intel_pmu_set_acr_dyn_mask(sibling, event_idx++, event);
> +		intel_pmu_set_acr_dyn_mask(event, event_idx, event);
> +
> +		leader->hw.flags |= PERF_X86_EVENT_ACR;
> +	}
> +
>  	if ((event->attr.type == PERF_TYPE_HARDWARE) ||
>  	    (event->attr.type == PERF_TYPE_HW_CACHE))
>  		return 0;
> @@ -4159,6 +4332,49 @@ static int intel_pmu_hw_config(struct perf_event *event)
>  	return 0;
>  }
>  
> +static void intel_pmu_update_acr_mask(struct cpu_hw_events *cpuc, int n, int *assign)
> +{
> +	struct perf_event *event;
> +	int n0, i, off;
> +
> +	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
> +		n0 = cpuc->n_events - cpuc->n_txn;
> +	else
> +		n0 = cpuc->n_events;
> +
> +	for (i = n0; i < n; i++) {
> +		event = cpuc->event_list[i];
> +		event->hw.config1 = 0;
> +
> +		/* Convert the group index into the counter index */
> +		for_each_set_bit(off, (unsigned long *)&event->attr.config2, n - n0)
> +			set_bit(assign[n0 + off], (unsigned long *)&event->hw.config1);

Atomic set_bit() is required?

> +	}
> +}
> +
> +static int intel_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
> +{
> +	struct perf_event *event;
> +	int ret = x86_schedule_events(cpuc, n, assign);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (cpuc->is_fake)
> +		return ret;
> +
> +	event = cpuc->event_list[n - 1];

ISTR seeing this pattern before somewhere and then argued it was all
sorts of broken. Why is it sane to look at the last event here?

> +	/*
> +	 * The acr_mask(config2) is the event-enabling order.
> +	 * Update it to the counter order after the counters are assigned.
> +	 */
> +	if (event && is_acr_event_group(event))
> +		intel_pmu_update_acr_mask(cpuc, n, assign);
> +
> +	return 0;
> +}
> +
> +
>  /*
>   * Currently, the only caller of this function is the atomic_switch_perf_msrs().
>   * The host perf context helps to prepare the values of the real hardware for
> @@ -5305,7 +5521,7 @@ static __initconst const struct x86_pmu intel_pmu = {
>  	.set_period		= intel_pmu_set_period,
>  	.update			= intel_pmu_update,
>  	.hw_config		= intel_pmu_hw_config,
> -	.schedule_events	= x86_schedule_events,
> +	.schedule_events	= intel_pmu_schedule_events,
>  	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
>  	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
>  	.fixedctr		= MSR_ARCH_PERFMON_FIXED_CTR0,

How about only setting that function if the PMU actually support ACR ?

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ