linux-kernel - Re: [PATCH 07/21] x86/intel_rdt/cqm: Add RDT monitoring initialization

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.10.1707061402500.3798@vshiva-Udesk>
Date:   Thu, 6 Jul 2017 14:07:45 -0700 (PDT)
From:   Shivappa Vikas <vikas.shivappa@...el.com>
To:     Thomas Gleixner <tglx@...utronix.de>
cc:     Vikas Shivappa <vikas.shivappa@...ux.intel.com>, x86@...nel.org,
        linux-kernel@...r.kernel.org, hpa@...or.com, peterz@...radead.org,
        "Shankar, Ravi V" <ravi.v.shankar@...el.com>,
        vikas.shivappa@...el.com, tony.luck@...el.com,
        "Yu, Fenghua" <fenghua.yu@...el.com>
Subject: Re: [PATCH 07/21] x86/intel_rdt/cqm: Add RDT monitoring
 initialization



On Sun, 2 Jul 2017, Thomas Gleixner wrote:

> On Mon, 26 Jun 2017, Vikas Shivappa wrote:
>> +/*
>> + * Global boolean for rdt_alloc which is true if any
>> + * resource allocation is enabled.
>> + */
>> +bool rdt_alloc_enabled;
>
> That should be rdt_alloc_capable. It's not enabled at probe time. Probing
> merily detects the capability. That mirrors the capable/enabled bits in the
> rdt resource struct.
>
>>  static void
>>  mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
>>  static void
>> @@ -230,7 +236,7 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
>>  	return true;
>>  }
>>
>> -static void rdt_get_cache_config(int idx, struct rdt_resource *r)
>> +static void rdt_get_cache_alloc_config(int idx, struct rdt_resource *r)
>>  {
>>  	union cpuid_0x10_1_eax eax;
>>  	union cpuid_0x10_x_edx edx;
>> @@ -422,7 +428,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
>>
>>  	d->id = id;
>>
>> -	if (domain_setup_ctrlval(r, d)) {
>> +	if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
>
> This should be done in the name space cleanup patch or in a separate one.
>
>>  		kfree(d);
>>  		return;
>>  	}
>> @@ -513,34 +519,39 @@ static __init void rdt_init_padding(void)
>>
>>  static __init bool get_rdt_resources(void)
>>  {
>> -	bool ret = false;
>> -
>>  	if (cache_alloc_hsw_probe())
>> -		return true;
>> +		rdt_alloc_enabled = true;
>>
>> -	if (!boot_cpu_has(X86_FEATURE_RDT_A))
>> +	if ((!rdt_alloc_enabled && !boot_cpu_has(X86_FEATURE_RDT_A)) &&
>> +	    !boot_cpu_has(X86_FEATURE_CQM))
>>  		return false;
>>
>> +	if (boot_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
>> +		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
>
> Instead of artificially cramming the CQM bits into this function, it
> would be cleaner to leave that function alone, rename it to
>
>      get_rdt_alloc_resources()
>
> and have a new function
>
>      get_rdt_mon_resources()
>
> and handle the aggregation at the call site.
>
> 	rdt_alloc_capable = get_rdt_alloc_resources();
> 	rdt_mon_capable = get_rdt_mon_resources();
>
> 	if (!rdt_alloc_capable && !rdt_mon_capable)
> 		return -ENODEV;
>
> I'd make both variables boolean and have rdt_mon_features as a separate
> one, which carries the actual available feature bits. This is neither
> hotpath nor are we in a situation where we need to spare the last 4byte of
> memory. Clean separation of code and functionality is more important.
>
>> +/*
>> + * Event IDs are used to program IA32_QM_EVTSEL before reading event
>> + * counter from IA32_QM_CTR
>> + */
>> +#define QOS_L3_OCCUP_EVENT_ID		0x01
>> +#define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
>> +#define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
>> +
>> +/**
>> + * struct mon_evt - Entry in the event list of a resource
>> + * @evtid:		event id
>> + * @name:		name of the event
>> + */
>> +struct mon_evt {
>> +	u32			evtid;
>> +	char			*name;
>> +	struct list_head	list;
>> +};
>> +
>> +extern unsigned int intel_cqm_threshold;
>> +extern bool rdt_alloc_enabled;
>> +extern int rdt_mon_features;
>
> Please do not use 'int' for variables which contain bit flags. unsigned int
> is the proper choice here.
>
>> +struct rmid_entry {
>> +	u32 rmid;
>> +	enum rmid_recycle_state state;
>> +	struct list_head list;
>
> Please make it tabular as you did with mon_evt and other structs.
>
>> +};
>> +
>> +/**
>> + * @rmid_free_lru    A least recently used list of free RMIDs
>> + *     These RMIDs are guaranteed to have an occupancy less than the
>> + *     threshold occupancy
>> + */
>> +static struct list_head	rmid_free_lru;
>> +
>> +/**
>> + * @rmid_limbo_lru       list of currently unused but (potentially)
>> + *     dirty RMIDs.
>> + *     This list contains RMIDs that no one is currently using but that
>> + *     may have a occupancy value > intel_cqm_threshold. User can change
>> + *     the threshold occupancy value.
>> + */
>> +static struct list_head	rmid_limbo_lru;
>> +
>> +/**
>> + * @rmid_entry - The entry in the limbo and free lists.
>> + */
>> +static struct rmid_entry	*rmid_ptrs;
>> +
>> +/*
>> + * Global boolean for rdt_monitor which is true if any
>
> Boolean !?!?!
>
>> + * resource monitoring is enabled.
>> + */
>> +int rdt_mon_features;
>> +
>> +/*
>> + * This is the threshold cache occupancy at which we will consider an
>> + * RMID available for re-allocation.
>> + */
>> +unsigned int intel_cqm_threshold;
>> +
>> +static inline struct rmid_entry *__rmid_entry(u32 rmid)
>> +{
>> +	struct rmid_entry *entry;
>> +
>> +	entry = &rmid_ptrs[rmid];
>> +	WARN_ON(entry->rmid != rmid);
>> +
>> +	return entry;
>> +}
>> +
>> +static int dom_data_init(struct rdt_resource *r)
>> +{
>> +	struct rmid_entry *entry = NULL;
>> +	int i = 0, nr_rmids;
>> +
>> +	INIT_LIST_HEAD(&rmid_free_lru);
>> +	INIT_LIST_HEAD(&rmid_limbo_lru);
>
> You can spare that by declaring the list head with
>
> static LIST_HEAD(rmid_xxx_lru);
>
>> +
>> +	nr_rmids = r->num_rmid;
>> +	rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
>> +	if (!rmid_ptrs)
>> +		return -ENOMEM;
>> +
>> +	for (; i < nr_rmids; i++) {
>
> Please initialize i in the for() construct. It's really bad to read,
> because the missing initialization statement makes one look for a special
> initialization magic just to figure out that it's simply i = 0.
>
>> +		entry = &rmid_ptrs[i];
>> +		INIT_LIST_HEAD(&entry->list);
>> +
>> +		entry->rmid = i;
>> +		list_add_tail(&entry->list, &rmid_free_lru);
>> +	}
>> +
>> +	/*
>> +	 * RMID 0 is special and is always allocated. It's used for all
>> +	 * tasks that are not monitored.
>> +	 */
>> +	entry = __rmid_entry(0);
>> +	list_del(&entry->list);
>> +
>> +	return 0;
>> +}
>> +
>> +static struct mon_evt llc_occupancy_event = {
>> +	.name = "llc_occupancy",
>> +	.evtid = QOS_L3_OCCUP_EVENT_ID,
>
> Tabluar...
>
>> +};
>> +
>> +static void l3_mon_evt_init(struct rdt_resource *r)
>> +{
>> +	INIT_LIST_HEAD(&r->evt_list);
>> +
>> +	if (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID))
>> +		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
>
> What's that list for? Why don't you have that event as a member of the L3
> rdt resource and control it via r->mon_capable/enabled?

Will fix all comments above. The memory bandwidth(total and local) is enumerated 
for L3 resource itself like llc_occupancy event with resource id as 1. 
CPUID.(EAX=0FH, ECX=0):EDX.L3[bit 1] = 1. So all three events are added to the 
L3 resource struct itself..

>
>> +}
>> +
>> +void rdt_get_mon_l3_config(struct rdt_resource *r)
>> +{
>> +	int ret;
>> +
>> +	r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
>> +	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
>> +
>> +	/*
>> +	 * A reasonable upper limit on the max threshold is the number
>> +	 * of lines tagged per RMID if all RMIDs have the same number of
>> +	 * lines tagged in the LLC.
>> +	 *
>> +	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
>> +	 */
>> +	intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
>> +
>> +	/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
>> +	intel_cqm_threshold /= r->mon_scale;
>> +
>> +	ret = dom_data_init(r);
>> +	if (ret)
>> +		goto out;
>> +
>> +	l3_mon_evt_init(r);
>> +
>> +	r->mon_capable = true;
>> +	r->mon_enabled = true;
>> +
>> +	return;
>> +out:
>> +	kfree(rmid_ptrs);
>> +	rdt_mon_features = 0;
>
> This is silly. if dom_data_init() fails, then it failed because it was
> unable to allocate rmid_ptrs. .....
>
> Also clearing rdt_mod_features here is conceptually wrong. Make that
> function return int, i.e. the failure value, and clear rdt_mon_capable at
> the call site in case of error.

Ok will fix and keep a seperate rdt_mon_capable.

Thanks,
Vikas

>
> Thanks,
>
> 	tglx
>
>
>
>
>