[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c804f4bf-3257-4e6a-b95b-ce6768854521@linaro.org>
Date: Tue, 15 Jul 2025 13:39:02 +0100
From: James Clark <james.clark@...aro.org>
To: Will Deacon <will@...nel.org>
Cc: Catalin Marinas <catalin.marinas@....com>,
Mark Rutland <mark.rutland@....com>, Jonathan Corbet <corbet@....net>,
Marc Zyngier <maz@...nel.org>, Oliver Upton <oliver.upton@...ux.dev>,
Joey Gouly <joey.gouly@....com>, Suzuki K Poulose <suzuki.poulose@....com>,
Zenghui Yu <yuzenghui@...wei.com>, Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>, Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>, leo.yan@....com,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, linux-doc@...r.kernel.org,
kvmarm@...ts.linux.dev
Subject: Re: [PATCH v3 03/10] perf: arm_spe: Add support for FEAT_SPE_EFT
extended filtering
On 14/07/2025 2:46 pm, Will Deacon wrote:
> On Thu, Jun 05, 2025 at 11:49:01AM +0100, James Clark wrote:
>> FEAT_SPE_EFT (optional from Armv9.4) adds mask bits for the existing
>> load, store and branch filters. It also adds two new filter bits for
>> SIMD and floating point with their own associated mask bits. The current
>> filters only allow OR filtering on samples that are load OR store etc,
>> and the new mask bits allow setting part of the filter to an AND, for
>> example filtering samples that are store AND SIMD. With mask bits set to
>> 0, the OR behavior is preserved, so the unless any masks are explicitly
>> set old filters will behave the same.
>>
>> Add them all and make them behave the same way as existing format bits,
>> hidden and return EOPNOTSUPP if set when the feature doesn't exist.
>>
>> Reviewed-by: Leo Yan <leo.yan@....com>
>> Tested-by: Leo Yan <leo.yan@....com>
>> Signed-off-by: James Clark <james.clark@...aro.org>
>> ---
>> drivers/perf/arm_spe_pmu.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 64 insertions(+)
>>
>> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
>> index d9f6d229dce8..9309b846f642 100644
>> --- a/drivers/perf/arm_spe_pmu.c
>> +++ b/drivers/perf/arm_spe_pmu.c
>> @@ -86,6 +86,7 @@ struct arm_spe_pmu {
>> #define SPE_PMU_FEAT_ERND (1UL << 5)
>> #define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6)
>> #define SPE_PMU_FEAT_DISCARD (1UL << 7)
>> +#define SPE_PMU_FEAT_EFT (1UL << 8)
>> #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63)
>> u64 features;
>>
>> @@ -197,6 +198,27 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
>> #define ATTR_CFG_FLD_discard_CFG config /* PMBLIMITR_EL1.FM = DISCARD */
>> #define ATTR_CFG_FLD_discard_LO 35
>> #define ATTR_CFG_FLD_discard_HI 35
>> +#define ATTR_CFG_FLD_branch_filter_mask_CFG config /* PMSFCR_EL1.Bm */
>> +#define ATTR_CFG_FLD_branch_filter_mask_LO 36
>> +#define ATTR_CFG_FLD_branch_filter_mask_HI 36
>> +#define ATTR_CFG_FLD_load_filter_mask_CFG config /* PMSFCR_EL1.LDm */
>> +#define ATTR_CFG_FLD_load_filter_mask_LO 37
>> +#define ATTR_CFG_FLD_load_filter_mask_HI 37
>> +#define ATTR_CFG_FLD_store_filter_mask_CFG config /* PMSFCR_EL1.STm */
>> +#define ATTR_CFG_FLD_store_filter_mask_LO 38
>> +#define ATTR_CFG_FLD_store_filter_mask_HI 38
>> +#define ATTR_CFG_FLD_simd_filter_CFG config /* PMSFCR_EL1.SIMD */
>> +#define ATTR_CFG_FLD_simd_filter_LO 39
>> +#define ATTR_CFG_FLD_simd_filter_HI 39
>> +#define ATTR_CFG_FLD_simd_filter_mask_CFG config /* PMSFCR_EL1.SIMDm */
>> +#define ATTR_CFG_FLD_simd_filter_mask_LO 40
>> +#define ATTR_CFG_FLD_simd_filter_mask_HI 40
>> +#define ATTR_CFG_FLD_float_filter_CFG config /* PMSFCR_EL1.FP */
>> +#define ATTR_CFG_FLD_float_filter_LO 41
>> +#define ATTR_CFG_FLD_float_filter_HI 41
>> +#define ATTR_CFG_FLD_float_filter_mask_CFG config /* PMSFCR_EL1.FPm */
>> +#define ATTR_CFG_FLD_float_filter_mask_LO 42
>> +#define ATTR_CFG_FLD_float_filter_mask_HI 42
>>
>> #define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */
>> #define ATTR_CFG_FLD_event_filter_LO 0
>> @@ -215,8 +237,15 @@ GEN_PMU_FORMAT_ATTR(pa_enable);
>> GEN_PMU_FORMAT_ATTR(pct_enable);
>> GEN_PMU_FORMAT_ATTR(jitter);
>> GEN_PMU_FORMAT_ATTR(branch_filter);
>> +GEN_PMU_FORMAT_ATTR(branch_filter_mask);
>> GEN_PMU_FORMAT_ATTR(load_filter);
>> +GEN_PMU_FORMAT_ATTR(load_filter_mask);
>> GEN_PMU_FORMAT_ATTR(store_filter);
>> +GEN_PMU_FORMAT_ATTR(store_filter_mask);
>> +GEN_PMU_FORMAT_ATTR(simd_filter);
>> +GEN_PMU_FORMAT_ATTR(simd_filter_mask);
>> +GEN_PMU_FORMAT_ATTR(float_filter);
>> +GEN_PMU_FORMAT_ATTR(float_filter_mask);
>> GEN_PMU_FORMAT_ATTR(event_filter);
>> GEN_PMU_FORMAT_ATTR(inv_event_filter);
>> GEN_PMU_FORMAT_ATTR(min_latency);
>> @@ -228,8 +257,15 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
>> &format_attr_pct_enable.attr,
>> &format_attr_jitter.attr,
>> &format_attr_branch_filter.attr,
>> + &format_attr_branch_filter_mask.attr,
>> &format_attr_load_filter.attr,
>> + &format_attr_load_filter_mask.attr,
>> &format_attr_store_filter.attr,
>> + &format_attr_store_filter_mask.attr,
>> + &format_attr_simd_filter.attr,
>> + &format_attr_simd_filter_mask.attr,
>> + &format_attr_float_filter.attr,
>> + &format_attr_float_filter_mask.attr,
>> &format_attr_event_filter.attr,
>> &format_attr_inv_event_filter.attr,
>> &format_attr_min_latency.attr,
>> @@ -250,6 +286,16 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
>> if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
>> return 0;
>>
>> + if ((attr == &format_attr_branch_filter_mask.attr ||
>> + attr == &format_attr_load_filter_mask.attr ||
>> + attr == &format_attr_store_filter_mask.attr ||
>> + attr == &format_attr_simd_filter.attr ||
>> + attr == &format_attr_simd_filter_mask.attr ||
>> + attr == &format_attr_float_filter.attr ||
>> + attr == &format_attr_float_filter_mask.attr) &&
>> + !(spe_pmu->features & SPE_PMU_FEAT_EFT))
>> + return 0;
>> +
>> return attr->mode;
>> }
>>
>> @@ -341,8 +387,15 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
>> u64 reg = 0;
>>
>> reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter));
>> + reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask));
>> reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter));
>> + reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask));
>> reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter));
>> + reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask));
>> + reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter));
>> + reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask));
>> + reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter));
>> + reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask));
>>
>> if (reg)
>> reg |= PMSFCR_EL1_FT;
>> @@ -716,6 +769,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>> u64 reg;
>> struct perf_event_attr *attr = &event->attr;
>> struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
>> + const u64 feat_spe_eft_bits = PMSFCR_EL1_LDm | PMSFCR_EL1_STm |
>> + PMSFCR_EL1_Bm | PMSFCR_EL1_SIMD |
>> + PMSFCR_EL1_SIMDm | PMSFCR_EL1_FP |
>> + PMSFCR_EL1_FPm;
>
> I'm not sure this constant is adding much, especially as its defined
> quite a long way from its single user.
>
>>
>> /* This is, of course, deeply driver-specific */
>> if (attr->type != event->pmu->type)
>> @@ -761,6 +818,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>> !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
>> return -EOPNOTSUPP;
>>
>> + if ((reg & feat_spe_eft_bits) &&
>> + !(spe_pmu->features & SPE_PMU_FEAT_EFT))
>> + return -EOPNOTSUPP;
>
> You could probably just spell out the entire thing tbh. It's verbose,
> but it's also pretty clear and it means we have everything in one place:
>
> if ((FIELD_GET(PMSFCR_EL1_LDm, reg) ||
> FIELD_GET(PMSFCR_EL1_STm, reg) ||
> FIELD_GET(PMSFCR_EL1_Bm, reg) ||
> FIELD_GET(PMSFCR_EL1_SIMD, reg) ||
> FIELD_GET(PMSFCR_EL1_SIMDm, reg) ||
> FIELD_GET(PMSFCR_EL1_FP, reg) ||
> FIELD_GET(PMSFCR_EL1_FPm, reg)) &&
> !(spe_pmu->features & SPE_PMU_FEAT_EFT))
> return -EOPNOTSUPP;
>
> Will
Ack
Powered by blists - more mailing lists