lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <53eddf53-a610-4420-9021-658fdf31aebe@arm.com>
Date: Fri, 12 Sep 2025 16:55:16 +0100
From: Ben Horgan <ben.horgan@....com>
To: James Morse <james.morse@....com>, linux-kernel@...r.kernel.org,
 linux-arm-kernel@...ts.infradead.org, linux-acpi@...r.kernel.org
Cc: D Scott Phillips OS <scott@...amperecomputing.com>,
 carl@...amperecomputing.com, lcherian@...vell.com,
 bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
 baolin.wang@...ux.alibaba.com, Jamie Iles <quic_jiles@...cinc.com>,
 Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
 dfustini@...libre.com, amitsinght@...vell.com,
 David Hildenbrand <david@...hat.com>, Dave Martin <dave.martin@....com>,
 Koba Ko <kobak@...dia.com>, Shanker Donthineni <sdonthineni@...dia.com>,
 fenghuay@...dia.com, baisheng.gao@...soc.com,
 Jonathan Cameron <jonathan.cameron@...wei.com>, Rob Herring
 <robh@...nel.org>, Rohit Mathew <rohit.mathew@....com>,
 Rafael Wysocki <rafael@...nel.org>, Len Brown <lenb@...nel.org>,
 Lorenzo Pieralisi <lpieralisi@...nel.org>, Hanjun Guo
 <guohanjun@...wei.com>, Sudeep Holla <sudeep.holla@....com>,
 Catalin Marinas <catalin.marinas@....com>, Will Deacon <will@...nel.org>,
 Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
 Danilo Krummrich <dakr@...nel.org>
Subject: Re: [PATCH v2 24/29] arm_mpam: Track bandwidth counter state for
 overflow and power management

Hi James,

On 9/10/25 21:43, James Morse wrote:
> Bandwidth counters need to run continuously to correctly reflect the
> bandwidth.
> 
> The value read may be lower than the previous value read in the case
> of overflow and when the hardware is reset due to CPU hotplug.
> 
> Add struct mbwu_state to track the bandwidth counter to allow overflow
> and power management to be handled.
> 
> Signed-off-by: James Morse <james.morse@....com>
> ---
> Changes since v1:
>  * Fixed lock/unlock typo.
> ---
>  drivers/resctrl/mpam_devices.c  | 154 +++++++++++++++++++++++++++++++-
>  drivers/resctrl/mpam_internal.h |  23 +++++
>  2 files changed, 175 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index 1543c33c5d6a..eeb62ed94520 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -918,6 +918,7 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>  	*ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID;
>  
>  	*flt_val = FIELD_PREP(MSMON_CFG_x_FLT_PARTID, ctx->partid);
> +
>  	if (m->ctx->match_pmg) {
>  		*ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG;
>  		*flt_val |= FIELD_PREP(MSMON_CFG_x_FLT_PMG, ctx->pmg);
> @@ -972,6 +973,7 @@ static void clean_msmon_ctl_val(u32 *cur_ctl)
>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  				     u32 flt_val)
>  {
> +	struct msmon_mbwu_state *mbwu_state;
>  	struct mpam_msc *msc = m->ris->vmsc->msc;
>  
>  	/*
> @@ -990,20 +992,32 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>  		mpam_write_monsel_reg(msc, MBWU, 0);
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
> +
> +		mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
> +		if (mbwu_state)
> +			mbwu_state->prev_val = 0;

What's the if condition doing here?

The below could make more sense but I don't think you can get here if
the allocation fails.

if (m->ris->mbwu_state)

> +
>  		break;
>  	default:
>  		return;
>  	}
>  }
>  
> +static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
> +{
> +	/* TODO: scaling, and long counters */
> +	return GENMASK_ULL(30, 0);
> +}
> +
>  /* Call with MSC lock held */
>  static void __ris_msmon_read(void *arg)
>  {
> -	u64 now;
>  	bool nrdy = false;
>  	struct mon_read *m = arg;
> +	u64 now, overflow_val = 0;
>  	struct mon_cfg *ctx = m->ctx;
>  	struct mpam_msc_ris *ris = m->ris;
> +	struct msmon_mbwu_state *mbwu_state;
>  	struct mpam_props *rprops = &ris->props;
>  	struct mpam_msc *msc = m->ris->vmsc->msc;
>  	u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt;
> @@ -1031,11 +1045,30 @@ static void __ris_msmon_read(void *arg)
>  		now = mpam_read_monsel_reg(msc, CSU);
>  		if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops))
>  			nrdy = now & MSMON___NRDY;
> +		now = FIELD_GET(MSMON___VALUE, now);
>  		break;
>  	case mpam_feat_msmon_mbwu:
>  		now = mpam_read_monsel_reg(msc, MBWU);
>  		if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
>  			nrdy = now & MSMON___NRDY;
> +		now = FIELD_GET(MSMON___VALUE, now);
> +
> +		if (nrdy)
> +			break;
> +
> +		mbwu_state = &ris->mbwu_state[ctx->mon];
> +		if (!mbwu_state)
> +			break;
> +
> +		/* Add any pre-overflow value to the mbwu_state->val */
> +		if (mbwu_state->prev_val > now)
> +			overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val;
> +
> +		mbwu_state->prev_val = now;
> +		mbwu_state->correction += overflow_val;
> +
> +		/* Include bandwidth consumed before the last hardware reset */
> +		now += mbwu_state->correction;
>  		break;
>  	default:
>  		m->err = -EINVAL;
> @@ -1048,7 +1081,6 @@ static void __ris_msmon_read(void *arg)
>  		return;
>  	}
>  
> -	now = FIELD_GET(MSMON___VALUE, now);
>  	*m->val += now;
>  }
>  
> @@ -1261,6 +1293,67 @@ static int mpam_reprogram_ris(void *_arg)
>  	return 0;
>  }
>  
> +/* Call with MSC lock held */
> +static int mpam_restore_mbwu_state(void *_ris)
> +{
> +	int i;
> +	struct mon_read mwbu_arg;
> +	struct mpam_msc_ris *ris = _ris;
> +
> +	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
> +		if (ris->mbwu_state[i].enabled) {
> +			mwbu_arg.ris = ris;
> +			mwbu_arg.ctx = &ris->mbwu_state[i].cfg;
> +			mwbu_arg.type = mpam_feat_msmon_mbwu;
> +
> +			__ris_msmon_read(&mwbu_arg);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/* Call with MSC lock and held */
> +static int mpam_save_mbwu_state(void *arg)
> +{
> +	int i;
> +	u64 val;
> +	struct mon_cfg *cfg;
> +	u32 cur_flt, cur_ctl, mon_sel;
> +	struct mpam_msc_ris *ris = arg;
> +	struct msmon_mbwu_state *mbwu_state;
> +	struct mpam_msc *msc = ris->vmsc->msc;
> +
> +	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
> +		mbwu_state = &ris->mbwu_state[i];
> +		cfg = &mbwu_state->cfg;
> +
> +		if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
> +			return -EIO;
> +
> +		mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) |
> +			  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
> +		mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
> +
> +		cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
> +		cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
> +		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0);
> +
> +		val = mpam_read_monsel_reg(msc, MBWU);
> +		mpam_write_monsel_reg(msc, MBWU, 0);
> +
> +		cfg->mon = i;
> +		cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt);
> +		cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl);
> +		cfg->partid = FIELD_GET(MSMON_CFG_x_FLT_PARTID, cur_flt);
> +		mbwu_state->correction += val;
> +		mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl);
> +		mpam_mon_sel_unlock(msc);
> +	}
> +
> +	return 0;
> +}
> +
>  static void mpam_init_reset_cfg(struct mpam_config *reset_cfg)
>  {
>  	memset(reset_cfg, 0, sizeof(*reset_cfg));
> @@ -1335,6 +1428,9 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online)
>  		 * for non-zero partid may be lost while the CPUs are offline.
>  		 */
>  		ris->in_reset_state = online;
> +
> +		if (mpam_is_enabled() && !online)
> +			mpam_touch_msc(msc, &mpam_save_mbwu_state, ris);
>  	}
>  }
>  
> @@ -1369,6 +1465,9 @@ static void mpam_reprogram_msc(struct mpam_msc *msc)
>  			mpam_reprogram_ris_partid(ris, partid, cfg);
>  		}
>  		ris->in_reset_state = reset;
> +
> +		if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
> +			mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris);
>  	}
>  }
>  
> @@ -2091,11 +2190,33 @@ static void mpam_unregister_irqs(void)
>  
>  static void __destroy_component_cfg(struct mpam_component *comp)
>  {
> +	struct mpam_msc *msc;
> +	struct mpam_vmsc *vmsc;
> +	struct mpam_msc_ris *ris;
> +
> +	lockdep_assert_held(&mpam_list_lock);
> +
>  	add_to_garbage(comp->cfg);
> +	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
> +		msc = vmsc->msc;
> +
> +		if (mpam_mon_sel_lock(msc)) {
> +			list_for_each_entry(ris, &vmsc->ris, vmsc_list)
> +				add_to_garbage(ris->mbwu_state);
> +			mpam_mon_sel_unlock(msc);
> +		}
> +	}
>  }
>  
>  static int __allocate_component_cfg(struct mpam_component *comp)
>  {
> +	int err = 0;
> +	struct mpam_msc *msc;
> +	struct mpam_vmsc *vmsc;
> +	struct mpam_msc_ris *ris;
> +	struct msmon_mbwu_state *mbwu_state;
> +
> +	lockdep_assert_held(&mpam_list_lock);
>  	mpam_assert_partid_sizes_fixed();
>  
>  	if (comp->cfg)
> @@ -2106,6 +2227,35 @@ static int __allocate_component_cfg(struct mpam_component *comp)
>  		return -ENOMEM;
>  	init_garbage(comp->cfg);
>  
> +	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
> +		if (!vmsc->props.num_mbwu_mon)
> +			continue;
> +
> +		msc = vmsc->msc;
> +		list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
> +			if (!ris->props.num_mbwu_mon)
> +				continue;
> +
> +			mbwu_state = kcalloc(ris->props.num_mbwu_mon,
> +					     sizeof(*ris->mbwu_state),
> +					     GFP_KERNEL);
> +			if (!mbwu_state) {
> +				__destroy_component_cfg(comp);
> +				err = -ENOMEM;
> +				break;
> +			}
> +
> +			if (mpam_mon_sel_lock(msc)) {
> +				init_garbage(mbwu_state);
> +				ris->mbwu_state = mbwu_state;
> +				mpam_mon_sel_unlock(msc);
> +			}

The if statement is confusing now that mpam_mon_sel_lock()
unconditionally returns true.

> +		}
> +
> +		if (err)
> +			break;
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
> index bb01e7dbde40..725c2aefa8a2 100644
> --- a/drivers/resctrl/mpam_internal.h
> +++ b/drivers/resctrl/mpam_internal.h
> @@ -212,6 +212,26 @@ struct mon_cfg {
>  	enum mon_filter_options opts;
>  };
>  
> +/*
> + * Changes to enabled and cfg are protected by the msc->lock.
> + * Changes to prev_val and correction are protected by the msc's mon_sel_lock.
> + */
> +struct msmon_mbwu_state {
> +	bool		enabled;
> +	struct mon_cfg	cfg;
> +
> +	/* The value last read from the hardware. Used to detect overflow. */
> +	u64		prev_val;
> +
> +	/*
> +	 * The value to add to the new reading to account for power management,
> +	 * and shifts to trigger the overflow interrupt.
> +	 */
> +	u64		correction;
> +
> +	struct mpam_garbage	garbage;
> +};
> +
>  struct mpam_class {
>  	/* mpam_components in this class */
>  	struct list_head	components;
> @@ -304,6 +324,9 @@ struct mpam_msc_ris {
>  	/* parent: */
>  	struct mpam_vmsc	*vmsc;
>  
> +	/* msmon mbwu configuration is preserved over reset */
> +	struct msmon_mbwu_state	*mbwu_state;
> +
>  	struct mpam_garbage	garbage;
>  };
>  

Thanks,

Ben


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ