[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <550037d8-b311-42b6-85ab-66aea8900c95@amd.com>
Date: Tue, 15 Apr 2025 11:41:28 -0500
From: "Moger, Babu" <babu.moger@....com>
To: Reinette Chatre <reinette.chatre@...el.com>, tony.luck@...el.com,
peternewman@...gle.com
Cc: corbet@....net, tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com,
paulmck@...nel.org, akpm@...ux-foundation.org, thuth@...hat.com,
rostedt@...dmis.org, ardb@...nel.org, gregkh@...uxfoundation.org,
daniel.sneddon@...ux.intel.com, jpoimboe@...nel.org,
alexandre.chartre@...cle.com, pawan.kumar.gupta@...ux.intel.com,
thomas.lendacky@....com, perry.yuan@....com, seanjc@...gle.com,
kai.huang@...el.com, xiaoyao.li@...el.com, kan.liang@...ux.intel.com,
xin3.li@...el.com, ebiggers@...gle.com, xin@...or.com,
sohil.mehta@...el.com, andrew.cooper3@...rix.com, mario.limonciello@....com,
linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
maciej.wieczor-retman@...el.com, eranian@...gle.com
Subject: Re: [PATCH v12 17/26] x86/resctrl: Add the support for reading ABMC
counters
Hi Reinette,
On 4/11/25 16:21, Reinette Chatre wrote:
> Hi Babu,
>
> On 4/3/25 5:18 PM, Babu Moger wrote:
>> Software can read the assignable counters using the QM_EVTSEL and QM_CTR
>> register pair.
>>
>> QM_EVTSEL Register definition:
>> =======================================================
>> Bits Mnemonic Description
>> =======================================================
>> 63:44 -- Reserved
>> 43:32 RMID Resource Monitoring Identifier
>> 31 ExtEvtID Extended Event Identifier
>> 30:8 -- Reserved
>> 7:0 EvtID Event Identifier
>> =======================================================
>>
>> The contents of a specific counter can be read by setting the following
>> fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
>> and setting [RMID] to the desired counter ID. Reading QM_CTR will then
>> return the contents of the specified counter. The E bit will be set if the
>> counter configuration was invalid, or if an invalid counter ID was set
>
> Would an invalid counter configuration be possible at this point? I expect
> that an invalid counter configuration would not allow the counter to be
> configured in the first place.
Ideally that is true. We should not hit this case. Added the text for
completeness.
>
>> in the QM_EVTSEL[RMID] field.
>>
>> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
>> Signed-off-by: Babu Moger <babu.moger@....com>
>> ---
>> v12: New patch to support extended event mode when ABMC is enabled.
>> ---
>> arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 +-
>> arch/x86/kernel/cpu/resctrl/internal.h | 7 +++
>> arch/x86/kernel/cpu/resctrl/monitor.c | 69 ++++++++++++++++-------
>> include/linux/resctrl.h | 9 +--
>> 4 files changed, 63 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
>> index 2225c40b8888..da78389c6ac7 100644
>> --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
>> +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
>> @@ -636,6 +636,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>> rr->r = r;
>> rr->d = d;
>> rr->first = first;
>> + rr->cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
>> rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
>> if (IS_ERR(rr->arch_mon_ctx)) {
>> rr->err = -EINVAL;
>> @@ -661,13 +662,14 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>> int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>> {
>> struct kernfs_open_file *of = m->private;
>> + enum resctrl_event_id evtid;
>> struct rdt_domain_hdr *hdr;
>> struct rmid_read rr = {0};
>> struct rdt_mon_domain *d;
>> - u32 resid, evtid, domid;
>> struct rdtgroup *rdtgrp;
>> struct rdt_resource *r;
>> union mon_data_bits md;
>> + u32 resid, domid;
>> int ret = 0;
>>
>
> Why make this change?
Yes. Not required.
>
>> rdtgrp = rdtgroup_kn_lock_live(of->kn);
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index fbb045aec7e5..b7d1a59f09f8 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -38,6 +38,12 @@
>> /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
>> #define ABMC_ENABLE_BIT 0
>>
>> +/*
>> + * ABMC Qos Event Identifiers.
>> + */
>> +#define ABMC_EXTENDED_EVT_ID BIT(31)
>> +#define ABMC_EVT_ID 1
>> +
>> /**
>> * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
>> * aren't marked nohz_full
>> @@ -156,6 +162,7 @@ struct rmid_read {
>> struct rdt_mon_domain *d;
>> enum resctrl_event_id evtid;
>> bool first;
>> + int cntr_id;
>> struct cacheinfo *ci;
>> int err;
>> u64 val;
>
> This does not look necessary (more below)
ok.
>
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index 5e7970fd0a97..58476c065921 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -269,8 +269,8 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
>> }
>>
>> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 unused, u32 rmid,
>> - enum resctrl_event_id eventid)
>> + u32 unused, u32 rmid, enum resctrl_event_id eventid,
>> + int cntr_id)
>> {
>> struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>> int cpu = cpumask_any(&d->hdr.cpu_mask);
>> @@ -281,7 +281,15 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> if (am) {
>> memset(am, 0, sizeof(*am));
>>
>> - prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> + if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
>> + resctrl_is_mbm_event(eventid)) {
>> + if (cntr_id < 0)
>> + return;
>> + prmid = cntr_id;
>> + eventid = ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID;
>
> hmmm ... this is not a valid enum resctrl_event_id.
Yes. I may have to introduce the new function __cntr_id_read_phys() to
address this.
>
> (before venturing into alternatives we need to study Tony's new RMID series
> because he made some changes to the enum that may support this work)
I looked into his series little bit.
https://lore.kernel.org/lkml/20250407234032.241215-1-tony.luck@intel.com/
I see he is refactoring the the events to support the new event types that
he is adding. It feels like his changes may not drastically affect the
changes I am doing here except some code conflicts between both the series.
>
>
>> + } else {
>> + prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> + }
>> /* Record any initial, non-zero count value. */
>> __rmid_read_phys(prmid, eventid, &am->prev_msr);
>> }
>> @@ -313,12 +321,13 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
>> }
>>
>> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 unused, u32 rmid, enum resctrl_event_id eventid,
>> - u64 *val, void *ignored)
>> + u32 unused, u32 rmid, int cntr_id,
>> + enum resctrl_event_id eventid, u64 *val, void *ignored)
>> {
>> struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>> int cpu = cpumask_any(&d->hdr.cpu_mask);
>> + enum resctrl_event_id peventid;
>> struct arch_mbm_state *am;
>> u64 msr_val, chunks;
>> u32 prmid;
>> @@ -326,8 +335,19 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>>
>> resctrl_arch_rmid_read_context_check();
>>
>> - prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> - ret = __rmid_read_phys(prmid, eventid, &msr_val);
>> + if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
>> + resctrl_is_mbm_event(eventid)) {
>> + if (cntr_id < 0)
>> + return cntr_id;
>> +
>> + prmid = cntr_id;
>> + peventid = ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID;
>
> same
>
Sure. I may have to introduce the new function __cntr_id_read_phys() to
address this.
>> + } else {
>> + prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> + peventid = eventid;
>> + }
>> +
>> + ret = __rmid_read_phys(prmid, peventid, &msr_val);
>> if (ret)
>> return ret;
>>
>> @@ -392,7 +412,7 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
>> break;
>>
>> entry = __rmid_entry(idx);
>> - if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
>> + if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
>> QOS_L3_OCCUP_EVENT_ID, &val,
>> arch_mon_ctx)) {
>> rmid_dirty = true;
>> @@ -599,7 +619,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
>> u64 tval = 0;
>>
>> if (rr->first) {
>> - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
>> + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid, rr->cntr_id);
>> m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
>> if (m)
>> memset(m, 0, sizeof(struct mbm_state));
>> @@ -610,7 +630,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
>> /* Reading a single domain, must be on a CPU in that domain. */
>> if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
>> return -EINVAL;
>> - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
>> + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->cntr_id,
>> rr->evtid, &tval, rr->arch_mon_ctx);
>> if (rr->err)
>> return rr->err;
>> @@ -635,7 +655,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
>> list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
>> if (d->ci->id != rr->ci->id)
>> continue;
>> - err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
>> + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->cntr_id,
>> rr->evtid, &tval, rr->arch_mon_ctx);
>> if (!err) {
>> rr->val += tval;
>> @@ -703,8 +723,8 @@ void mon_event_count(void *info)
>>
>> if (rdtgrp->type == RDTCTRL_GROUP) {
>> list_for_each_entry(entry, head, mon.crdtgrp_list) {
>> - if (__mon_event_count(entry->closid, entry->mon.rmid,
>> - rr) == 0)
>> + rr->cntr_id = mbm_cntr_get(rr->r, rr->d, entry, rr->evtid);
>> + if (__mon_event_count(entry->closid, entry->mon.rmid, rr) == 0)
>> ret = 0;
>> }
>> }
>> @@ -835,13 +855,15 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
>> }
>>
>> static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 closid, u32 rmid, enum resctrl_event_id evtid)
>> + u32 closid, u32 rmid, int cntr_id,
>> + enum resctrl_event_id evtid)
>
> Would it not be simpler to provide resource group as argument (remove closid, rmid, and
> cntr_id) and determine cntr_id from known data to provide cntr_id as argument to
> __mon_event_count(), removing the need for a new member in struct rmid_read?
Yes. We can do that.
>
>> {
>> struct rmid_read rr = {0};
>>
>> rr.r = r;
>> rr.d = d;
>> rr.evtid = evtid;
>> + rr.cntr_id = cntr_id;
>> rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
>> if (IS_ERR(rr.arch_mon_ctx)) {
>> pr_warn_ratelimited("Failed to allocate monitor context: %ld",
>> @@ -862,17 +884,22 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *
>> }
>>
>> static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 closid, u32 rmid)
>> + struct rdtgroup *rdtgrp, u32 closid, u32 rmid)
>
> This looks redundant to provide both the resource group and two of its members as parameters.
> Looks like this can just be resource group and then remove closid and rmid?
Yes. We can do that.
>
>> {
>> + int cntr_id;
>> /*
>> * This is protected from concurrent reads from user as both
>> * the user and overflow handler hold the global mutex.
>> */
>> - if (resctrl_arch_is_mbm_total_enabled())
>> - mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
>> + if (resctrl_arch_is_mbm_total_enabled()) {
>> + cntr_id = mbm_cntr_get(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
>> + mbm_update_one_event(r, d, closid, rmid, cntr_id, QOS_L3_MBM_TOTAL_EVENT_ID);
>
> With similar change to mbm_update_one_event() where it takes resource group as parameter
> it is not needed to compute counter ID here.
>
> This patch could be split. One patch can replace the closid/rmid in mbm_update()
> and mbm_update_one_event() with the resource group. Following patches can build on that.
Sure. We can do that.
>
>> + }
>>
>> - if (resctrl_arch_is_mbm_local_enabled())
>> - mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
>> + if (resctrl_arch_is_mbm_local_enabled()) {
>> + cntr_id = mbm_cntr_get(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
>> + mbm_update_one_event(r, d, closid, rmid, cntr_id, QOS_L3_MBM_LOCAL_EVENT_ID);
>> + }
>> }
>>
>> /*
>> @@ -945,11 +972,11 @@ void mbm_handle_overflow(struct work_struct *work)
>> d = container_of(work, struct rdt_mon_domain, mbm_over.work);
>>
>> list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
>> - mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
>> + mbm_update(r, d, prgrp, prgrp->closid, prgrp->mon.rmid);
>
> providing both the resource group and two of its members really looks
> redundant.
Will take care of thato.
>
>>
>> head = &prgrp->mon.crdtgrp_list;
>> list_for_each_entry(crgrp, head, mon.crdtgrp_list)
>> - mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
>> + mbm_update(r, d, crgrp, crgrp->closid, crgrp->mon.rmid);
>
> same
>
Sure.
>>
>> if (is_mba_sc(NULL))
>> update_mba_bw(prgrp, d);
>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>> index 60270606f1b8..107cb14a0db2 100644
>> --- a/include/linux/resctrl.h
>> +++ b/include/linux/resctrl.h
>> @@ -466,8 +466,9 @@ void resctrl_offline_cpu(unsigned int cpu);
>> * 0 on success, or -EIO, -EINVAL etc on error.
>> */
>> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 closid, u32 rmid, enum resctrl_event_id eventid,
>> - u64 *val, void *arch_mon_ctx);
>> + u32 closid, u32 rmid, int cntr_id,
>> + enum resctrl_event_id eventid, u64 *val,
>> + void *arch_mon_ctx);
>>
>> /**
>> * resctrl_arch_rmid_read_context_check() - warn about invalid contexts
>> @@ -513,8 +514,8 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
>> * This can be called from any CPU.
>> */
>> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 closid, u32 rmid,
>> - enum resctrl_event_id eventid);
>> + u32 closid, u32 rmid, enum resctrl_event_id eventid,
>> + int cntr_id);
>>
>> /**
>> * resctrl_arch_reset_rmid_all() - Reset all private state associated with
>
> When changing the interface the associated kernel doc should also be updated.
>
Sure.
--
Thanks
Babu Moger
Powered by blists - more mailing lists