[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <88b93600-ace7-4a0c-a61f-7f11d3f38b0e@amd.com>
Date: Wed, 18 Sep 2024 11:26:52 -0500
From: "Moger, Babu" <babu.moger@....com>
To: Reinette Chatre <reinette.chatre@...el.com>, corbet@....net,
tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, x86@...nel.org
Cc: fenghua.yu@...el.com, hpa@...or.com, paulmck@...nel.org,
thuth@...hat.com, xiongwei.song@...driver.com, ardb@...nel.org,
pawan.kumar.gupta@...ux.intel.com, daniel.sneddon@...ux.intel.com,
sandipan.das@....com, kai.huang@...el.com, peterz@...radead.org,
kan.liang@...ux.intel.com, pbonzini@...hat.com, xin3.li@...el.com,
ebiggers@...gle.com, alexandre.chartre@...cle.com, perry.yuan@....com,
tan.shaopeng@...itsu.com, james.morse@....com, tony.luck@...el.com,
maciej.wieczor-retman@...el.com, linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org, peternewman@...gle.com, eranian@...gle.com
Subject: Re: [PATCH 4/7] x86/resctrl: Implement SDCIAE enable/disable
Hi Reinette,
On 9/13/24 15:46, Reinette Chatre wrote:
> Hi Babu,
>
> On 8/16/24 9:16 AM, Babu Moger wrote:
>> SDCIAE feature can be enabled by setting bit 1 in MSR L3_QOS_EXT_CFG.
>> When the state of SDCIAE is changed, it must be changed to the updated
>> value on all logical processors in the QOS Domain. By default, the SDCIAE
>> feature is disabled.
>>
>> Introduce arch handlers to detect and enable/disable the feature.
>>
>> The SDCIAE feature details are available in APM listed below [1].
>> [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
>> Publication # 24593 Revision 3.41 section 19.4.7 L3 Smart Data Cache
>> Injection Allocation Enforcement (SDCIAE)
>>
>> Signed-off-by: Babu Moger <babu.moger@....com>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
>> ---
>> arch/x86/include/asm/msr-index.h | 1 +
>> arch/x86/kernel/cpu/resctrl/internal.h | 12 +++++
>> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 ++++++++++++++++++++++++++
>> 3 files changed, 74 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/msr-index.h
>> b/arch/x86/include/asm/msr-index.h
>> index 82c6a4d350e0..c78afed3c21f 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -1181,6 +1181,7 @@
>> /* - AMD: */
>> #define MSR_IA32_MBA_BW_BASE 0xc0000200
>> #define MSR_IA32_SMBA_BW_BASE 0xc0000280
>> +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff
>> #define MSR_IA32_EVT_CFG_BASE 0xc0000400
>> /* MSR_IA32_VMX_MISC bits */
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
>> b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 955999aecfca..ceb0e8e1ed76 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -56,6 +56,9 @@
>> /* Max event bits supported */
>> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>> +/* Setting bit 1 in L3_QOS_EXT_CFG enables the SDCIAE feature. */
>> +#define SDCIAE_ENABLE_BIT 1
>> +
>> /**
>> * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring
>> those that
>> * aren't marked nohz_full
>> @@ -477,6 +480,7 @@ struct rdt_parse_data {
>> * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth
>> * Monitoring Event Configuration (BMEC) is supported.
>> * @cdp_enabled: CDP state of this resource
>> + * @sdciae_enabled: SDCIAE feature is enabled
>> *
>> * Members of this structure are either private to the architecture
>> * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
>> @@ -491,6 +495,7 @@ struct rdt_hw_resource {
>> unsigned int mbm_width;
>> unsigned int mbm_cfg_mask;
>> bool cdp_enabled;
>> + bool sdciae_enabled;
>> };
>> static inline struct rdt_hw_resource *resctrl_to_arch_res(struct
>> rdt_resource *r)
>> @@ -536,6 +541,13 @@ int resctrl_arch_set_cdp_enabled(enum
>> resctrl_res_level l, bool enable);
>> void arch_mon_domain_online(struct rdt_resource *r, struct
>> rdt_mon_domain *d);
>> +static inline bool resctrl_arch_get_sdciae_enabled(enum
>> resctrl_res_level l)
>> +{
>> + return rdt_resources_all[l].sdciae_enabled;
>> +}
>> +
>> +int resctrl_arch_set_sdciae_enabled(enum resctrl_res_level l, bool
>> enable);
>> +
>> /*
>> * To return the common struct rdt_resource, which is contained in struct
>> * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index d7163b764c62..c62d6183bfe4 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -1789,6 +1789,67 @@ static ssize_t
>> mbm_local_bytes_config_write(struct kernfs_open_file *of,
>> return ret ?: nbytes;
>> }
>> +static void resctrl_sdciae_msrwrite(void *arg)
>> +{
>> + bool *enable = arg;
>> +
>> + if (*enable)
>> + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
>> + else
>> + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
>> +}
>
> (hmmm ... so there is an effort to make the rest of the code not be
> resource specific ... but then the lowest level has L3 MSR hardcoded)
Not very clear on this.
I can separate the patch into two, one arch specific and the other FS
specific.
>
>> +
>> +static int resctrl_sdciae_setup(enum resctrl_res_level l, bool enable)
>> +{
>> + struct rdt_resource *r = &rdt_resources_all[l].r_resctrl;
>> + struct rdt_ctrl_domain *d;
>> +
>> + /* Update L3_QOS_EXT_CFG MSR on all the CPUs in all domains*/
>
> (please note some space issues above)
Sure.
>
>> + list_for_each_entry(d, &r->ctrl_domains, hdr.list)
>> + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_msrwrite,
>> &enable, 1);
>> +
>> + return 0;
>
> It seems that this will be inside the arch specific code while
> resctrl_arch_set_sdciae_enabled() will be called by resctrl fs code. It is
> thus not clear why above returns an int, thus forcing callers to do
> error code handling, when it will always just return 0.
Yes. It is returning 0 right now. Keeps the options open for other arch's
report error. Looks like we heading to make this generic feature.
>
>> +}
>> +
>> +static int resctrl_sdciae_enable(enum resctrl_res_level l)
>> +{
>> + struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
>> + int ret = 0;
>> +
>> + if (!hw_res->sdciae_enabled) {
>> + ret = resctrl_sdciae_setup(l, true);
>> + if (!ret)
>> + hw_res->sdciae_enabled = true;
>> + }
>> +
>> + return ret;
>
> Same here ... this will always return 0, no?
Yes. it is returns 0 always on AMD. Keeps the options open for other
arch's report error.
>
>> +}
>> +
>> +static void resctrl_sdciae_disable(enum resctrl_res_level l)
>> +{
>> + struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
>> +
>> + if (hw_res->sdciae_enabled) {
>> + resctrl_sdciae_setup(l, false);
>> + hw_res->sdciae_enabled = false;
>> + }
>> +}
>> +
>> +int resctrl_arch_set_sdciae_enabled(enum resctrl_res_level l, bool enable)
>> +{
>> + struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
>> +
>> + if (!hw_res->r_resctrl.sdciae_capable)
>> + return -EINVAL;
>> +
>> + if (enable)
>> + return resctrl_sdciae_enable(l);
>> +
>> + resctrl_sdciae_disable(l);
>> +
>> + return 0;
>> +}
>> +
>> /* rdtgroup information files for one cache resource. */
>> static struct rftype res_common_files[] = {
>> {
>
> Reinette
>
--
Thanks
Babu Moger
Powered by blists - more mailing lists