linux-kernel - Re: [PATCH 07/21] x86/intel_rdt/cqm: Add RDT monitoring initialization

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1707020935520.2296@nanos>
Date:   Sun, 2 Jul 2017 11:14:01 +0200 (CEST)
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Vikas Shivappa <vikas.shivappa@...ux.intel.com>
cc:     x86@...nel.org, linux-kernel@...r.kernel.org, hpa@...or.com,
        peterz@...radead.org, ravi.v.shankar@...el.com,
        vikas.shivappa@...el.com, tony.luck@...el.com,
        fenghua.yu@...el.com, andi.kleen@...el.com
Subject: Re: [PATCH 07/21] x86/intel_rdt/cqm: Add RDT monitoring
 initialization

On Mon, 26 Jun 2017, Vikas Shivappa wrote:
> +/*
> + * Global boolean for rdt_alloc which is true if any
> + * resource allocation is enabled.
> + */
> +bool rdt_alloc_enabled;

That should be rdt_alloc_capable. It's not enabled at probe time. Probing
merily detects the capability. That mirrors the capable/enabled bits in the
rdt resource struct.

>  static void
>  mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
>  static void
> @@ -230,7 +236,7 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
>  	return true;
>  }
>  
> -static void rdt_get_cache_config(int idx, struct rdt_resource *r)
> +static void rdt_get_cache_alloc_config(int idx, struct rdt_resource *r)
>  {
>  	union cpuid_0x10_1_eax eax;
>  	union cpuid_0x10_x_edx edx;
> @@ -422,7 +428,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
>  
>  	d->id = id;
>  
> -	if (domain_setup_ctrlval(r, d)) {
> +	if (r->alloc_capable && domain_setup_ctrlval(r, d)) {

This should be done in the name space cleanup patch or in a separate one.

>  		kfree(d);
>  		return;
>  	}
> @@ -513,34 +519,39 @@ static __init void rdt_init_padding(void)
>  
>  static __init bool get_rdt_resources(void)
>  {
> -	bool ret = false;
> -
>  	if (cache_alloc_hsw_probe())
> -		return true;
> +		rdt_alloc_enabled = true;
>  
> -	if (!boot_cpu_has(X86_FEATURE_RDT_A))
> +	if ((!rdt_alloc_enabled && !boot_cpu_has(X86_FEATURE_RDT_A)) &&
> +	    !boot_cpu_has(X86_FEATURE_CQM))
>  		return false;
>  
> +	if (boot_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
> +		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);

Instead of artificially cramming the CQM bits into this function, it
would be cleaner to leave that function alone, rename it to

      get_rdt_alloc_resources()

and have a new function

      get_rdt_mon_resources()

and handle the aggregation at the call site.

	rdt_alloc_capable = get_rdt_alloc_resources();
	rdt_mon_capable = get_rdt_mon_resources();

	if (!rdt_alloc_capable && !rdt_mon_capable)
		return -ENODEV;

I'd make both variables boolean and have rdt_mon_features as a separate
one, which carries the actual available feature bits. This is neither
hotpath nor are we in a situation where we need to spare the last 4byte of
memory. Clean separation of code and functionality is more important.

> +/*
> + * Event IDs are used to program IA32_QM_EVTSEL before reading event
> + * counter from IA32_QM_CTR
> + */
> +#define QOS_L3_OCCUP_EVENT_ID		0x01
> +#define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
> +#define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
> +
> +/**
> + * struct mon_evt - Entry in the event list of a resource
> + * @evtid:		event id
> + * @name:		name of the event
> + */
> +struct mon_evt {
> +	u32			evtid;
> +	char			*name;
> +	struct list_head	list;
> +};
> +
> +extern unsigned int intel_cqm_threshold;
> +extern bool rdt_alloc_enabled;
> +extern int rdt_mon_features;

Please do not use 'int' for variables which contain bit flags. unsigned int
is the proper choice here.

> +struct rmid_entry {
> +	u32 rmid;
> +	enum rmid_recycle_state state;
> +	struct list_head list;

Please make it tabular as you did with mon_evt and other structs.

> +};
> +
> +/**
> + * @rmid_free_lru    A least recently used list of free RMIDs
> + *     These RMIDs are guaranteed to have an occupancy less than the
> + *     threshold occupancy
> + */
> +static struct list_head	rmid_free_lru;
> +
> +/**
> + * @rmid_limbo_lru       list of currently unused but (potentially)
> + *     dirty RMIDs.
> + *     This list contains RMIDs that no one is currently using but that
> + *     may have a occupancy value > intel_cqm_threshold. User can change
> + *     the threshold occupancy value.
> + */
> +static struct list_head	rmid_limbo_lru;
> +
> +/**
> + * @rmid_entry - The entry in the limbo and free lists.
> + */
> +static struct rmid_entry	*rmid_ptrs;
> +
> +/*
> + * Global boolean for rdt_monitor which is true if any

Boolean !?!?!

> + * resource monitoring is enabled.
> + */
> +int rdt_mon_features;
> +
> +/*
> + * This is the threshold cache occupancy at which we will consider an
> + * RMID available for re-allocation.
> + */
> +unsigned int intel_cqm_threshold;
> +
> +static inline struct rmid_entry *__rmid_entry(u32 rmid)
> +{
> +	struct rmid_entry *entry;
> +
> +	entry = &rmid_ptrs[rmid];
> +	WARN_ON(entry->rmid != rmid);
> +
> +	return entry;
> +}
> +
> +static int dom_data_init(struct rdt_resource *r)
> +{
> +	struct rmid_entry *entry = NULL;
> +	int i = 0, nr_rmids;
> +
> +	INIT_LIST_HEAD(&rmid_free_lru);
> +	INIT_LIST_HEAD(&rmid_limbo_lru);

You can spare that by declaring the list head with

static LIST_HEAD(rmid_xxx_lru);

> +
> +	nr_rmids = r->num_rmid;
> +	rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
> +	if (!rmid_ptrs)
> +		return -ENOMEM;
> +
> +	for (; i < nr_rmids; i++) {

Please initialize i in the for() construct. It's really bad to read,
because the missing initialization statement makes one look for a special
initialization magic just to figure out that it's simply i = 0.

> +		entry = &rmid_ptrs[i];
> +		INIT_LIST_HEAD(&entry->list);
> +
> +		entry->rmid = i;
> +		list_add_tail(&entry->list, &rmid_free_lru);
> +	}
> +
> +	/*
> +	 * RMID 0 is special and is always allocated. It's used for all
> +	 * tasks that are not monitored.
> +	 */
> +	entry = __rmid_entry(0);
> +	list_del(&entry->list);
> +
> +	return 0;
> +}
> +
> +static struct mon_evt llc_occupancy_event = {
> +	.name = "llc_occupancy",
> +	.evtid = QOS_L3_OCCUP_EVENT_ID,

Tabluar...

> +};
> +
> +static void l3_mon_evt_init(struct rdt_resource *r)
> +{
> +	INIT_LIST_HEAD(&r->evt_list);
> +
> +	if (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID))
> +		list_add_tail(&llc_occupancy_event.list, &r->evt_list);

What's that list for? Why don't you have that event as a member of the L3
rdt resource and control it via r->mon_capable/enabled?

> +}
> +
> +void rdt_get_mon_l3_config(struct rdt_resource *r)
> +{
> +	int ret;
> +
> +	r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
> +	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
> +
> +	/*
> +	 * A reasonable upper limit on the max threshold is the number
> +	 * of lines tagged per RMID if all RMIDs have the same number of
> +	 * lines tagged in the LLC.
> +	 *
> +	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
> +	 */
> +	intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
> +
> +	/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
> +	intel_cqm_threshold /= r->mon_scale;
> +
> +	ret = dom_data_init(r);
> +	if (ret)
> +		goto out;
> +
> +	l3_mon_evt_init(r);
> +
> +	r->mon_capable = true;
> +	r->mon_enabled = true;
> +
> +	return;
> +out:
> +	kfree(rmid_ptrs);
> +	rdt_mon_features = 0;

This is silly. if dom_data_init() fails, then it failed because it was
unable to allocate rmid_ptrs. .....

Also clearing rdt_mod_features here is conceptually wrong. Make that
function return int, i.e. the failure value, and clear rdt_mon_capable at
the call site in case of error.

Thanks,

	tglx