[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <802aac6d-b261-45b3-9a36-3532a1d8f519@amd.com>
Date: Fri, 7 Feb 2025 15:10:18 -0600
From: "Moger, Babu" <bmoger@....com>
To: Reinette Chatre <reinette.chatre@...el.com>,
Babu Moger <babu.moger@....com>, corbet@....net, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
tony.luck@...el.com, peternewman@...gle.com
Cc: x86@...nel.org, hpa@...or.com, paulmck@...nel.org,
akpm@...ux-foundation.org, thuth@...hat.com, rostedt@...dmis.org,
xiongwei.song@...driver.com, pawan.kumar.gupta@...ux.intel.com,
daniel.sneddon@...ux.intel.com, jpoimboe@...nel.org, perry.yuan@....com,
sandipan.das@....com, kai.huang@...el.com, xiaoyao.li@...el.com,
seanjc@...gle.com, xin3.li@...el.com, andrew.cooper3@...rix.com,
ebiggers@...gle.com, mario.limonciello@....com, james.morse@....com,
tan.shaopeng@...itsu.com, linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org, maciej.wieczor-retman@...el.com,
eranian@...gle.com
Subject: Re: [PATCH v11 15/23] x86/resctrl: Add the functionality to assigm
MBM events
Hi Reinette,
On 2/5/2025 7:05 PM, Reinette Chatre wrote:
> Hi Babu,
>
> subject: "assigm" -> "assign"
Sure.
>
> On 1/22/25 12:20 PM, Babu Moger wrote:
>> The mbm_cntr_assign mode offers several counters that can be assigned
>
> This "several counters" contradicts the "very small number of assignable
> counters" used in earlier patch to justify how counters are managed.
How about?
The mbm_cntr_assign mode offers "num_mbm_cntrs" number of counters that
can be assigned to an RMID, event pair and monitor the bandwidth as long
as it is assigned.
>
>> to an RMID, event pair and monitor the bandwidth as long as it is
>> assigned.
>>
>> Add the functionality to allocate and assign the counters to RMID, event
>> pair in the domain.
>>
>> If all counters are in use, the kernel will show an error message: "Out
>> of MBM assignable counters" when a new assignment is requested. Exit on
>> the first failure when assigning counters across all the domains.
>> Report the error in /sys/fs/resctrl/info/last_cmd_status.
>>
>> Signed-off-by: Babu Moger <babu.moger@....com>
>> ---
>
> ..
>
>> ---
>> arch/x86/kernel/cpu/resctrl/internal.h | 2 +
>> arch/x86/kernel/cpu/resctrl/monitor.c | 105 +++++++++++++++++++++++++
>> 2 files changed, 107 insertions(+)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 161d3feb567c..547d8a4c8aba 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -727,4 +727,6 @@ u32 resctrl_arch_mon_event_config_get(struct rdt_mon_domain *d,
>> int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>> enum resctrl_event_id evtid, u32 rmid, u32 closid,
>> u32 cntr_id, bool assign);
>> +int resctrl_assign_cntr_event(struct rdt_resource *r, struct rdt_mon_domain *d,
>> + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid);
>> #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index ef836bb69b9b..127c4000a81a 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -1413,3 +1413,108 @@ int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>>
>> return 0;
>> }
>> +
>> +/*
>> + * Configure the counter for the event, RMID pair for the domain. Reset the
>> + * non-architectural state to clear all the event counters.
>> + */
>> +static int resctrl_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>> + enum resctrl_event_id evtid, u32 rmid, u32 closid,
>> + u32 cntr_id, bool assign)
>> +{
>> + struct mbm_state *m;
>> + int ret;
>> +
>> + ret = resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
>> + if (ret)
>> + return ret;
>> +
>> + m = get_mbm_state(d, closid, rmid, evtid);
>> + if (m)
>> + memset(m, 0, sizeof(struct mbm_state));
>> +
>> + return ret;
>> +}
>> +
>> +static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d,
>> + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> + int cntr_id;
>> +
>> + for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
>> + if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
>> + d->cntr_cfg[cntr_id].evtid == evtid)
>> + return cntr_id;
>> + }
>> +
>> + return -ENOENT;
>> +}
>> +
>> +static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
>> + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> + int cntr_id;
>> +
>> + for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
>> + if (!d->cntr_cfg[cntr_id].rdtgrp) {
>> + d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
>> + d->cntr_cfg[cntr_id].evtid = evtid;
>> + return cntr_id;
>> + }
>> + }
>> +
>> + return -ENOSPC;
>> +}
>> +
>> +static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id)
>> +{
>> + memset(&d->cntr_cfg[cntr_id], 0, sizeof(struct mbm_cntr_cfg));
>> +}
>> +
>> +/*
>> + * Allocate a fresh counter and configure the event if not assigned already
>> + * else return success.
>
> I find this confusing. I think the "else return success" can just be dropped.
Sure.
>
>> + */
>> +static int resctrl_alloc_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>> + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> + int cntr_id, ret = 0;
>> +
>> + if (mbm_cntr_get(r, d, rdtgrp, evtid) == -ENOENT) {
>
> This can be simplified while reducing a level of indent with:
>
> /* No need to allocate and configure if counter already assigned to this event. */
> if (mbm_cntr_get(r, d, rdtgrp, evtid) >= 0)
> return 0;
Sure.
>
>> + cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
>> + if (cntr_id < 0) {
>> + rdt_last_cmd_printf("Domain %d is Out of MBM assignable counter\n",
>
> "Domain %d is Out of MBM assignable counter" -> "Domain %d is out of MBM assignable counters"
> or, the message can be something like "Unable to allocate counter in domain %d" to not
Yes. "Unable to allocate counter in domain %d" sounds better.
> assume the error and just return the error directly. resctrl_process_flags() can in turn
> not override the error resulting in -ENOSPC returned to userspace that can be interpreted
> appropriately instead of always returning -EINVAL and requiring user space to check
> last_cmd_status?
Sure.
>
>> + d->hdr.id);
>> + return -ENOSPC;
>
> Please do not override error of a function.
Ok
>
>> + }
>> +
>> + ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid, rdtgrp->closid,
>> + cntr_id, true);
>> + if (ret) {
>> + rdt_last_cmd_printf("Assignment failed on domain %d\n", d->hdr.id);
>
> I assume this targets the scenario when user space requests "all" domains to be changed
> and the error message in resctrl_process_flags() will then print "*" instead of the
> actual domain ID. If this is the goal to give more detail to error then the event
> can be displayed also?
Sure. Will change it to.
rdt_last_cmd_printf("Assignment of event %d failed on domain %d\n",
d->hdr.id, evtid);
>
>> + mbm_cntr_free(d, cntr_id);
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/*
>> + * Assign a hardware counter to event @evtid of group @rdtgrp.
>> + * Counter will be assigned to all the domains if @d is NULL else
>> + * the counter will be assigned to @d.
>
> Please use available 80 chars.
Sure.
>
>> + */
>> +int resctrl_assign_cntr_event(struct rdt_resource *r, struct rdt_mon_domain *d,
>> + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> + int ret = 0;
>> +
>> + if (!d) {
>> + list_for_each_entry(d, &r->mon_domains, hdr.list)
>> + ret = resctrl_alloc_config_cntr(r, d, rdtgrp, evtid);
>
> This does not "exit on first failure" as the changelog claims. It actually looks like
> as long as the last domain succeeds, while all other domains fail, this request is
> considered successful.
Yes. That is correct. I have to check return status in each loop. Will
fix it.
list_for_each_entry(d, &r->mon_domains, hdr.list) {
ret = resctrl_alloc_config_cntr(r, d, rdtgrp, evtid);
if (ret)
return ret;
}
>
>> + } else {
>> + ret = resctrl_alloc_config_cntr(r, d, rdtgrp, evtid);
>> + }
>> +
>> + return ret;
>> +}
>
> Reinette
>
Thanks
Babu
Powered by blists - more mailing lists