[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.10.1707061402500.3798@vshiva-Udesk>
Date: Thu, 6 Jul 2017 14:07:45 -0700 (PDT)
From: Shivappa Vikas <vikas.shivappa@...el.com>
To: Thomas Gleixner <tglx@...utronix.de>
cc: Vikas Shivappa <vikas.shivappa@...ux.intel.com>, x86@...nel.org,
linux-kernel@...r.kernel.org, hpa@...or.com, peterz@...radead.org,
"Shankar, Ravi V" <ravi.v.shankar@...el.com>,
vikas.shivappa@...el.com, tony.luck@...el.com,
"Yu, Fenghua" <fenghua.yu@...el.com>
Subject: Re: [PATCH 07/21] x86/intel_rdt/cqm: Add RDT monitoring
initialization
On Sun, 2 Jul 2017, Thomas Gleixner wrote:
> On Mon, 26 Jun 2017, Vikas Shivappa wrote:
>> +/*
>> + * Global boolean for rdt_alloc which is true if any
>> + * resource allocation is enabled.
>> + */
>> +bool rdt_alloc_enabled;
>
> That should be rdt_alloc_capable. It's not enabled at probe time. Probing
> merily detects the capability. That mirrors the capable/enabled bits in the
> rdt resource struct.
>
>> static void
>> mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
>> static void
>> @@ -230,7 +236,7 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
>> return true;
>> }
>>
>> -static void rdt_get_cache_config(int idx, struct rdt_resource *r)
>> +static void rdt_get_cache_alloc_config(int idx, struct rdt_resource *r)
>> {
>> union cpuid_0x10_1_eax eax;
>> union cpuid_0x10_x_edx edx;
>> @@ -422,7 +428,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
>>
>> d->id = id;
>>
>> - if (domain_setup_ctrlval(r, d)) {
>> + if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
>
> This should be done in the name space cleanup patch or in a separate one.
>
>> kfree(d);
>> return;
>> }
>> @@ -513,34 +519,39 @@ static __init void rdt_init_padding(void)
>>
>> static __init bool get_rdt_resources(void)
>> {
>> - bool ret = false;
>> -
>> if (cache_alloc_hsw_probe())
>> - return true;
>> + rdt_alloc_enabled = true;
>>
>> - if (!boot_cpu_has(X86_FEATURE_RDT_A))
>> + if ((!rdt_alloc_enabled && !boot_cpu_has(X86_FEATURE_RDT_A)) &&
>> + !boot_cpu_has(X86_FEATURE_CQM))
>> return false;
>>
>> + if (boot_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
>> + rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
>
> Instead of artificially cramming the CQM bits into this function, it
> would be cleaner to leave that function alone, rename it to
>
> get_rdt_alloc_resources()
>
> and have a new function
>
> get_rdt_mon_resources()
>
> and handle the aggregation at the call site.
>
> rdt_alloc_capable = get_rdt_alloc_resources();
> rdt_mon_capable = get_rdt_mon_resources();
>
> if (!rdt_alloc_capable && !rdt_mon_capable)
> return -ENODEV;
>
> I'd make both variables boolean and have rdt_mon_features as a separate
> one, which carries the actual available feature bits. This is neither
> hotpath nor are we in a situation where we need to spare the last 4byte of
> memory. Clean separation of code and functionality is more important.
>
>> +/*
>> + * Event IDs are used to program IA32_QM_EVTSEL before reading event
>> + * counter from IA32_QM_CTR
>> + */
>> +#define QOS_L3_OCCUP_EVENT_ID 0x01
>> +#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02
>> +#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03
>> +
>> +/**
>> + * struct mon_evt - Entry in the event list of a resource
>> + * @evtid: event id
>> + * @name: name of the event
>> + */
>> +struct mon_evt {
>> + u32 evtid;
>> + char *name;
>> + struct list_head list;
>> +};
>> +
>> +extern unsigned int intel_cqm_threshold;
>> +extern bool rdt_alloc_enabled;
>> +extern int rdt_mon_features;
>
> Please do not use 'int' for variables which contain bit flags. unsigned int
> is the proper choice here.
>
>> +struct rmid_entry {
>> + u32 rmid;
>> + enum rmid_recycle_state state;
>> + struct list_head list;
>
> Please make it tabular as you did with mon_evt and other structs.
>
>> +};
>> +
>> +/**
>> + * @rmid_free_lru A least recently used list of free RMIDs
>> + * These RMIDs are guaranteed to have an occupancy less than the
>> + * threshold occupancy
>> + */
>> +static struct list_head rmid_free_lru;
>> +
>> +/**
>> + * @rmid_limbo_lru list of currently unused but (potentially)
>> + * dirty RMIDs.
>> + * This list contains RMIDs that no one is currently using but that
>> + * may have a occupancy value > intel_cqm_threshold. User can change
>> + * the threshold occupancy value.
>> + */
>> +static struct list_head rmid_limbo_lru;
>> +
>> +/**
>> + * @rmid_entry - The entry in the limbo and free lists.
>> + */
>> +static struct rmid_entry *rmid_ptrs;
>> +
>> +/*
>> + * Global boolean for rdt_monitor which is true if any
>
> Boolean !?!?!
>
>> + * resource monitoring is enabled.
>> + */
>> +int rdt_mon_features;
>> +
>> +/*
>> + * This is the threshold cache occupancy at which we will consider an
>> + * RMID available for re-allocation.
>> + */
>> +unsigned int intel_cqm_threshold;
>> +
>> +static inline struct rmid_entry *__rmid_entry(u32 rmid)
>> +{
>> + struct rmid_entry *entry;
>> +
>> + entry = &rmid_ptrs[rmid];
>> + WARN_ON(entry->rmid != rmid);
>> +
>> + return entry;
>> +}
>> +
>> +static int dom_data_init(struct rdt_resource *r)
>> +{
>> + struct rmid_entry *entry = NULL;
>> + int i = 0, nr_rmids;
>> +
>> + INIT_LIST_HEAD(&rmid_free_lru);
>> + INIT_LIST_HEAD(&rmid_limbo_lru);
>
> You can spare that by declaring the list head with
>
> static LIST_HEAD(rmid_xxx_lru);
>
>> +
>> + nr_rmids = r->num_rmid;
>> + rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
>> + if (!rmid_ptrs)
>> + return -ENOMEM;
>> +
>> + for (; i < nr_rmids; i++) {
>
> Please initialize i in the for() construct. It's really bad to read,
> because the missing initialization statement makes one look for a special
> initialization magic just to figure out that it's simply i = 0.
>
>> + entry = &rmid_ptrs[i];
>> + INIT_LIST_HEAD(&entry->list);
>> +
>> + entry->rmid = i;
>> + list_add_tail(&entry->list, &rmid_free_lru);
>> + }
>> +
>> + /*
>> + * RMID 0 is special and is always allocated. It's used for all
>> + * tasks that are not monitored.
>> + */
>> + entry = __rmid_entry(0);
>> + list_del(&entry->list);
>> +
>> + return 0;
>> +}
>> +
>> +static struct mon_evt llc_occupancy_event = {
>> + .name = "llc_occupancy",
>> + .evtid = QOS_L3_OCCUP_EVENT_ID,
>
> Tabluar...
>
>> +};
>> +
>> +static void l3_mon_evt_init(struct rdt_resource *r)
>> +{
>> + INIT_LIST_HEAD(&r->evt_list);
>> +
>> + if (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID))
>> + list_add_tail(&llc_occupancy_event.list, &r->evt_list);
>
> What's that list for? Why don't you have that event as a member of the L3
> rdt resource and control it via r->mon_capable/enabled?
Will fix all comments above. The memory bandwidth(total and local) is enumerated
for L3 resource itself like llc_occupancy event with resource id as 1.
CPUID.(EAX=0FH, ECX=0):EDX.L3[bit 1] = 1. So all three events are added to the
L3 resource struct itself..
>
>> +}
>> +
>> +void rdt_get_mon_l3_config(struct rdt_resource *r)
>> +{
>> + int ret;
>> +
>> + r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
>> + r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
>> +
>> + /*
>> + * A reasonable upper limit on the max threshold is the number
>> + * of lines tagged per RMID if all RMIDs have the same number of
>> + * lines tagged in the LLC.
>> + *
>> + * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
>> + */
>> + intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
>> +
>> + /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
>> + intel_cqm_threshold /= r->mon_scale;
>> +
>> + ret = dom_data_init(r);
>> + if (ret)
>> + goto out;
>> +
>> + l3_mon_evt_init(r);
>> +
>> + r->mon_capable = true;
>> + r->mon_enabled = true;
>> +
>> + return;
>> +out:
>> + kfree(rmid_ptrs);
>> + rdt_mon_features = 0;
>
> This is silly. if dom_data_init() fails, then it failed because it was
> unable to allocate rmid_ptrs. .....
>
> Also clearing rdt_mod_features here is conceptually wrong. Make that
> function return int, i.e. the failure value, and clear rdt_mon_capable at
> the call site in case of error.
Ok will fix and keep a seperate rdt_mon_capable.
Thanks,
Vikas
>
> Thanks,
>
> tglx
>
>
>
>
>
Powered by blists - more mailing lists