linux-kernel - Re: [PATCH v2 22/33] x86/intel_rdt.c: Extend RDT to per cache and per resources

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1609081448520.5647@nanos>
Date:   Thu, 8 Sep 2016 16:57:51 +0200 (CEST)
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Fenghua Yu <fenghua.yu@...el.com>
cc:     "H. Peter Anvin" <h.peter.anvin@...el.com>,
        Ingo Molnar <mingo@...e.hu>, Tony Luck <tony.luck@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Tejun Heo <tj@...nel.org>, Borislav Petkov <bp@...e.de>,
        Stephane Eranian <eranian@...gle.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>,
        Shaohua Li <shli@...com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        Sai Prakhya <sai.praneeth.prakhya@...el.com>,
        linux-kernel <linux-kernel@...r.kernel.org>, x86 <x86@...nel.org>
Subject: Re: [PATCH v2 22/33] x86/intel_rdt.c: Extend RDT to per cache and
 per resources

On Thu, 8 Sep 2016, Fenghua Yu wrote:

> +#define __DCBM_TABLE_INDEX(x) (x << 1)
> +#define __ICBM_TABLE_INDEX(x) ((x << 1) + 1)

This macro mess is completely undocumented.

> +inline int get_dcbm_table_index(int x)

static inline ??? 

> +{
> +	return DCBM_TABLE_INDEX(x);
> +}
> +
> +inline int get_icbm_table_index(int x)
> +{
> +	return ICBM_TABLE_INDEX(x);
> +}

Why are you introducing these when they are not used at all?

>  /*
>   * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
>   * as it does not have CPUID enumeration support for Cache allocation.
> @@ -98,41 +123,141 @@ static inline bool cache_alloc_hsw_probe(void)
>  
>  	wrmsr_safe(MSR_IA32_PQR_ASSOC, l, h_old);
>  
> -	boot_cpu_data.x86_cache_max_closid = 4;
> -	boot_cpu_data.x86_cache_max_cbm_len = 20;
> +	boot_cpu_data.x86_l3_max_closid = 4;
> +	boot_cpu_data.x86_l3_max_cbm_len = 20;

So if you actually change the name of the cpudata struct member, then this
would make sense to be split out into a seperate patch. But hell, the patch
order of this stuff is an unholy mess anyway.

>  	min_bitmask_len = 2;
>  
>  	return true;
>  }
>  
> +u32 max_cbm_len(int level)
> +{
> +	switch (level) {
> +	case CACHE_LEVEL3:
> +		return boot_cpu_data.x86_l3_max_cbm_len;
> +	default:
> +		break;
> +	}
> +
> +	return (u32)~0;
> +}
> +u64 max_cbm(int level)

More functions without users and of course without a proper prefix for
public consumption. Documentation is not available either.

> +{
> +	switch (level) {
> +	case CACHE_LEVEL3:
> +		return (1ULL << boot_cpu_data.x86_l3_max_cbm_len) - 1;

So the above max_cmb_len() returns:

	cpuid_count(0x00000010, 1, &eax, &ebx, &ecx, &edx);
	c->x86_l3_max_cbm_len = eax + 1;

i.e the content of leaf 10:1 EAX plus 1.

According to the SDM:

    EAX 4:0: Length of the capacity bit mask for the corresponding ResID.

So first of all. Why is there a "+ 1" ? Again, that's not documented in the
cpuid initialization code at all.

So now you take that magic number and return:

       (2 ^ magic) - 1

Cute. To be honest I'm too lazy to read through the SDM and figure it out
myself. This kind of stuff belongs into the code as comments. I seriously
doubt that the function names match the actual meaning.

> +	default:
> +		break;
> +	}
> +
> +	return (u64)~0;

What kind of return code is this ?

> +static inline bool cat_l3_supported(struct cpuinfo_x86 *c)
> +{
> +	if (cpu_has(c, X86_FEATURE_CAT_L3))
> +		return true;
> +
> +	/*
> +	 * Probe for Haswell server CPUs.
> +	 */
> +	if (c->x86 == 0x6 && c->x86_model == 0x3f)
> +		return cache_alloc_hsw_probe();

Ah now we have an actual user for that haswell probe thingy ....

> +	return false;
> +}
> +
>  void __intel_rdt_sched_in(void *dummy)

I still have not found a reasonable explanation for this dummy argument.

>  {
>  	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
> +	struct rdtgroup *rdtgrp;
> +	int closid;
> +	int domain;

Sigh.
  
>  	/*
> -	 * Currently closid is always 0. When  user interface is added,
> -	 * closid will come from user interface.
> +	 * If this task is assigned to an rdtgroup, use it.
> +	 * Else use the group assigned to this cpu.
>  	 */
> -	if (state->closid == 0)
> +	rdtgrp = current->rdtgroup;
> +	if (!rdtgrp)
> +		rdtgrp = this_cpu_read(cpu_rdtgroup);

This makes actually sense! Thanks for listening!

> +
> +	domain = this_cpu_read(cpu_shared_domain);
> +	closid = rdtgrp->resource.closid[domain];
> +
> +	if (closid == state->closid)
>  		return;
>  
> -	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
> -	state->closid = 0;
> +	state->closid = closid;
> +	/* Don't really write PQR register in simulation mode. */
> +	if (unlikely(rdt_opts.simulate_cat_l3))
> +		return;
> +
> +	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
>  }
>  
>  /*
>   * When cdp mode is enabled, refcnt is maintained in the dcache_cbm entry.
>   */
> -static inline void closid_get(u32 closid)
> +inline void closid_get(u32 closid, int domain)

s/static inline/inline/ What the heck?

Can you please explain what this is doing?

>  {
> -	struct clos_cbm_table *cct = &cctable[DCBM_TABLE_INDEX(closid)];
> -
>  	lockdep_assert_held(&rdtgroup_mutex);
>  
> -	cct->clos_refcnt++;
> +	if (cat_l3_enabled) {
> +		int l3_domain;
> +		int dindex;

  		int l3_domain, dindex;

> +		l3_domain = shared_domain[domain].l3_domain;
> +		dindex = DCBM_TABLE_INDEX(closid);
> +		l3_cctable[l3_domain][dindex].clos_refcnt++;
> +		if (cdp_enabled) {
> +			int iindex = ICBM_TABLE_INDEX(closid);

And if you call that variable 'index' instead of 'dindex' then you don't
need an extra one 'iindex'.

> +
> +			l3_cctable[l3_domain][iindex].clos_refcnt++;

So now you have a seperate refcount for the Icache part, but the comment
above the function still says otherwise.

> +		}
> +	}
>  }
>  
> -static int closid_alloc(u32 *closid)
> +int closid_alloc(u32 *closid, int domain)
>  {
>  	u32 maxid;
>  	u32 id;
> @@ -140,105 +265,215 @@ static int closid_alloc(u32 *closid)
>  	lockdep_assert_held(&rdtgroup_mutex);
>  
>  	maxid = cconfig.max_closid;
> -	id = find_first_zero_bit(cconfig.closmap, maxid);
> +	id = find_first_zero_bit((unsigned long *)cconfig.closmap[domain],

Why do you need a typecast here? Get your damned structs straight.

> +				 maxid);
> +
>  	if (id == maxid)
>  		return -ENOSPC;
>  
> -	set_bit(id, cconfig.closmap);
> -	closid_get(id);
> +	set_bit(id, (unsigned long *)cconfig.closmap[domain]);
> +	closid_get(id, domain);
>  	*closid = id;
> -	cconfig.closids_used++;
>  
>  	return 0;
>  }
>  
> -static inline void closid_free(u32 closid)
> +unsigned int get_domain_num(int level)
>  {
> -	clear_bit(closid, cconfig.closmap);
> -	cctable[DCBM_TABLE_INDEX(closid)].cbm = 0;
> -
> -	if (WARN_ON(!cconfig.closids_used))
> -		return;
> +	if (level == CACHE_LEVEL3)
> +		return cpumask_weight(&rdt_l3_cpumask);

get_domain_num(level) suggests to me that it returns the domain number
corresponding to the level, but it actually returns the number of bits set
in the rdt_l3_cpumask. Very intuitive - NOT!

> +	else
> +		return -EINVAL;

Proper return value for a function returning 'unsigned int' ....

> +}
>  
> -	cconfig.closids_used--;
> +int level_to_leaf(int level)
> +{
> +	switch (level) {
> +	case CACHE_LEVEL3:
> +		return 3;
> +	default:
> +		return -EINVAL;
> +	}
>  }
>  
> -static void closid_put(u32 closid)
> +void closid_free(u32 closid, int domain, int level)
>  {
> -	struct clos_cbm_table *cct = &cctable[DCBM_TABLE_INDEX(closid)];
> +	struct clos_cbm_table **cctable;
> +	int leaf;
> +	struct cpumask *mask;
> +	int cpu;
> +
> +	if (level == CACHE_LEVEL3)
> +		cctable = l3_cctable;

Oh well. Why needs this assignment to happen here?

> +
> +	clear_bit(closid, (unsigned long *)cconfig.closmap[domain]);
> +
> +	if (level == CACHE_LEVEL3) {

And not here where it actually makes sense?

> +		cctable[domain][closid].cbm = max_cbm(level);
> +		leaf = level_to_leaf(level);
> +		mask = &cache_domains[leaf].shared_cpu_map[domain];
> +		cpu = cpumask_first(mask);
> +		smp_call_function_single(cpu, cbm_update_l3_msr, &closid, 1);

A comment explaining that this must be done on one of the cpus in @domain
would be too helpful.

> +	}
> +}
>  
> +static void _closid_put(u32 closid, struct clos_cbm_table *cct,

Please use two underscores so it's obvious.

> +			int domain, int level)
> +{
>  	lockdep_assert_held(&rdtgroup_mutex);
>  	if (WARN_ON(!cct->clos_refcnt))
>  		return;
>  
>  	if (!--cct->clos_refcnt)
> -		closid_free(closid);
> +		closid_free(closid, domain, level);
>  }
>  
> -static void msr_cpu_update(void *arg)
> +void closid_put(u32 closid, int domain)
> +{
> +	struct clos_cbm_table *cct;
> +
> +	if (cat_l3_enabled) {
> +		int l3_domain = shared_domain[domain].l3_domain;
> +
> +		cct = &l3_cctable[l3_domain][DCBM_TABLE_INDEX(closid)];
> +		_closid_put(closid, cct, l3_domain, CACHE_LEVEL3);
> +		if (cdp_enabled) {
> +			cct = &l3_cctable[l3_domain][ICBM_TABLE_INDEX(closid)];
> +			_closid_put(closid, cct, l3_domain, CACHE_LEVEL3);
> +		}
> +	}
> +}
> +
> +void msr_cpu_update(void *arg)
>  {
>  	struct rdt_remote_data *info = arg;
>  
> +	if (unlikely(rdt_opts.verbose))
> +		pr_info("Write %lx to msr %x on cpu%d\n",
> +			(unsigned long)info->val, info->msr,
> +			smp_processor_id());

That's what DYNAMIC_DEBUG is for.

> +
> +	if (unlikely(rdt_opts.simulate_cat_l3))
> +		return;

Do we really need all this simulation stuff?

> +
>  	wrmsrl(info->msr, info->val);
>  }
>  
> +static struct cpumask *rdt_cache_cpumask(int level)
> +{
> +	return &rdt_l3_cpumask;

That's a really useful helper .... Your choice of checking 'level' five
times in a row in various helpers versus returning l3 unconditionally is at
least interesting.

> +int get_cache_leaf(int level, int cpu)
> +{
> +	int index;
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);

this_cpu_ci is a complete misnomer as it suggests that it's actually the
cacheinfo for 'this cpu', i.e. the cpu on which the code is executing.

> +	struct cacheinfo *this_leaf;
> +	int num_leaves = this_cpu_ci->num_leaves;
> +
> +	for (index = 0; index < num_leaves; index++) {
> +		this_leaf = this_cpu_ci->info_list + index;
> +		if (this_leaf->level == level)
> +			return index;

The function is misnomed as well. It does not return the cache leaf, it
returns the leaf index .....

> +	}

Why do you have a cacheinfo related function in this RDT code? 

> +
> +	return -EINVAL;
> +}
> +
> +static struct cpumask *get_shared_cpu_map(int cpu, int level)
> +{
> +	int index;
> +	struct cacheinfo *leaf;
> +	struct cpu_cacheinfo *cpu_ci = get_cpu_cacheinfo(cpu);
> +
> +	index = get_cache_leaf(level, cpu);
> +	if (index < 0)
> +		return 0;
> +
> +	leaf = cpu_ci->info_list + index;

While here you actually get the leaf.

> +
> +	return &leaf->shared_cpu_map;
>  }
  
>  static int clear_rdtgroup_cpumask(unsigned int cpu)
> @@ -293,63 +531,270 @@ static int clear_rdtgroup_cpumask(unsigned int cpu)
>  
>  static int intel_rdt_offline_cpu(unsigned int cpu)
>  {
> -	int i;
> +	struct cpumask *shared_cpu_map;
> +	int new_cpu;
> +	int l3_domain;
> +	int level;
> +	int leaf;

Sigh. 1/3 of the line space is wasted for single variable declarations.
 
>  	mutex_lock(&rdtgroup_mutex);
> -	if (!cpumask_test_and_clear_cpu(cpu, &rdt_cpumask)) {
> -		mutex_unlock(&rdtgroup_mutex);
> -		return;
> -	}
>  
> -	cpumask_and(&tmp_cpumask, topology_core_cpumask(cpu), cpu_online_mask);
> -	cpumask_clear_cpu(cpu, &tmp_cpumask);
> -	i = cpumask_any(&tmp_cpumask);
> +	level = CACHE_LEVEL3;

I have a hard time to understand the value of that 'level' variable, but
well that's the least of my worries with that code.

> +
> +	l3_domain = per_cpu(cpu_l3_domain, cpu);
> +	leaf = level_to_leaf(level);
> +	shared_cpu_map = &cache_domains[leaf].shared_cpu_map[l3_domain];
>  
> -	if (i < nr_cpu_ids)
> -		cpumask_set_cpu(i, &rdt_cpumask);
> +	cpumask_clear_cpu(cpu, &rdt_l3_cpumask);
> +	cpumask_clear_cpu(cpu, shared_cpu_map);
> +	if (cpumask_empty(shared_cpu_map))
> +		goto out;

So what clears @cpu in the rdtgroup cpumask?

> +
> +	new_cpu = cpumask_first(shared_cpu_map);
> +	rdt_cpumask_update(&rdt_l3_cpumask, new_cpu, level);
>  
>  	clear_rdtgroup_cpumask(cpu);

Can you please use a consistent prefix and naming scheme?

    rdt_cpumask_update()
    clear_rdtgroup_cpumask()

WTF?

> +out:
>  	mutex_unlock(&rdtgroup_mutex);
> +	return 0;
> +}
> +
> +/*
> + * Initialize per-cpu cpu_l3_domain.
> + *
> + * cpu_l3_domain numbers are consequtive integer starting from 0.
> + * Sets up 1:1 mapping of cpu id and cpu_l3_domain.
> + */
> +static int __init cpu_cache_domain_init(int level)
> +{
> +	int i, j;
> +	int max_cpu_cache_domain = 0;
> +	int index;
> +	struct cacheinfo *leaf;
> +	int *domain;
> +	struct cpu_cacheinfo *cpu_ci;

Eyes hurt.

> +
> +	for_each_online_cpu(i) {
> +		domain = &per_cpu(cpu_l3_domain, i);

per_cpu_ptr() exists for a reason.

> +		if (*domain == -1) {
> +			index = get_cache_leaf(level, i);
> +			if (index < 0)
> +				return -EINVAL;
> +
> +			cpu_ci = get_cpu_cacheinfo(i);
> +			leaf = cpu_ci->info_list + index;
> +			if (cpumask_empty(&leaf->shared_cpu_map)) {
> +				WARN(1, "no shared cpu for L2\n");

So L2 is always the right thing for every value of @level? And what the
heck is the value of that WARN? Nothing because the callchain is already
known. What's worse is that you don't tell about @level, @index, @i (which
should be named @cpu).

> +				return -EINVAL;
> +			}
> +
> +			for_each_cpu(j, &leaf->shared_cpu_map) {

So again independent of @level you fiddle with cpu_l3_domain. Interesting.

> +				domain = &per_cpu(cpu_l3_domain, j);
> +				*domain = max_cpu_cache_domain;
> +			}
> +			max_cpu_cache_domain++;

what's the actual meaning of max_cpu_cache_domain?

> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int __init rdt_setup(char *str)
> +{
> +	char *tok;
> +
> +	while ((tok = strsep(&str, ",")) != NULL) {
> +		if (!*tok)
> +			return -EINVAL;
> +
> +		if (strcmp(tok, "simulate_cat_l3") == 0) {
> +			pr_info("Simulate CAT L3\n");
> +			rdt_opts.simulate_cat_l3 = true;

So this goes into rdt_opts

> +		} else if (strcmp(tok, "disable_cat_l3") == 0) {
> +			pr_info("CAT L3 is disabled\n");
> +			disable_cat_l3 = true;

While this is a distinct control variable. Very consistent.

> +		} else {
> +			pr_info("Invalid rdt option\n");

Very helpful w/o printing the actual option ....

> +			return -EINVAL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +__setup("resctrl=", rdt_setup);
> +
> +static inline bool resource_alloc_enabled(void)
> +{
> +	return cat_l3_enabled;
> +}

Oh well.

> +
> +static int shared_domain_init(void)
> +{
> +	int l3_domain_num = get_domain_num(CACHE_LEVEL3);
> +	int size;
> +	int domain;
> +	struct cpumask *cpumask;
> +	struct cpumask *shared_cpu_map;
> +	int cpu;

More random variable declarations.

> +	if (cat_l3_enabled) {
> +		shared_domain_num = l3_domain_num;
> +		cpumask = &rdt_l3_cpumask;
> +	} else
> +		return -EINVAL;

Missing curly braces.

> +
> +	size = shared_domain_num * sizeof(struct shared_domain);
> +	shared_domain = kzalloc(size, GFP_KERNEL);
> +	if (!shared_domain)
> +		return -EINVAL;
> +
> +	domain = 0;
> +	for_each_cpu(cpu, cpumask) {
> +		if (cat_l3_enabled)
> +			shared_domain[domain].l3_domain =
> +					per_cpu(cpu_l3_domain, cpu);
> +		else
> +			shared_domain[domain].l3_domain = -1;
> +
> +		shared_cpu_map = get_shared_cpu_map(cpu, CACHE_LEVEL3);
> +
> +		cpumask_copy(&shared_domain[domain].cpumask, shared_cpu_map);

What's the point of updating the cpumask when the thing is disabled? If
there is a reason then this should be documented in a comment.

> +		domain++;
> +	}
> +	for_each_online_cpu(cpu) {
> +		if (cat_l3_enabled)
> +			per_cpu(cpu_shared_domain, cpu) =
> +					per_cpu(cpu_l3_domain, cpu);

More missing curly braces. And using an intermediate variable would remove
this hard to read line breaks.

> +	}
> +
> +	return 0;
> +}
> +
> +static int cconfig_init(int maxid)
> +{
> +	int num;
> +	int domain;
> +	unsigned long *closmap_block;
> +	int maxid_size;
> +
> +	maxid_size = BITS_TO_LONGS(maxid);
> +	num = maxid_size * shared_domain_num;
> +	cconfig.closmap = kcalloc(maxid, sizeof(unsigned long *), GFP_KERNEL);

Really intuitive. You calc num right before allocating the closmap pointers
and then you use it in the next alloc.

> +	if (!cconfig.closmap)
> +		goto out_free;
> +
> +	closmap_block = kcalloc(num, sizeof(unsigned long), GFP_KERNEL);
> +	if (!closmap_block)
> +		goto out_free;
> +
> +	for (domain = 0; domain < shared_domain_num; domain++)
> +		cconfig.closmap[domain] = (unsigned long *)closmap_block +

More random type casting.

> +					  domain * maxid_size;

Why don't you allocate that whole mess in one go?

	unsigned int ptrsize, mapsize, size, d;
    	void *p;

	ptrsize = maxid * sizeof(unsigned long *);
    	mapsize = BITS_TO_LONGS(maxid) * sizeof(unsigned long);
    	size = ptrsize + num_shared_domains * mapsize;

	p = kzalloc(size, GFP_KERNEL);
	if (!p)
		return -ENOMEM;

	cconfig.closmap = p;	
	cconfig.max_closid = maxid;

	p += ptrsize;
	for (d = 0; d < num_shared_domains; d++, p += mapsize)
	       cconfig.closmap[d] = p;
	return 0;

Would be too simple. Once more.

> +
> +	cconfig.max_closid = maxid;
> +
> +	return 0;
> +out_free:
> +	kfree(cconfig.closmap);
> +	kfree(closmap_block);
> +	return -ENOMEM;
> +}
> +
> +static int __init cat_cache_init(int level, int maxid,
> +				 struct clos_cbm_table ***cctable)
> +{
> +	int domain_num;
> +	int domain;
> +	int size;
> +	int ret = 0;
> +	struct clos_cbm_table *p;
> +
> +	domain_num = get_domain_num(level);
> +	size = domain_num * sizeof(struct clos_cbm_table *);
> +	*cctable = kzalloc(size, GFP_KERNEL);
> +	if (!*cctable) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	size = maxid * domain_num * sizeof(struct clos_cbm_table);
> +	p = kzalloc(size, GFP_KERNEL);
> +	if (!p) {
> +		kfree(*cctable);
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	for (domain = 0; domain < domain_num; domain++)
> +		(*cctable)[domain] = p + domain * maxid;

Same crap.

> +
> +	ret = cpu_cache_domain_init(level);
> +	if (ret) {
> +		kfree(*cctable);
> +		kfree(p);
> +	}
> +out:
> +	return ret;
>  }
>  
>  static int __init intel_rdt_late_init(void)
>  {
>  	struct cpuinfo_x86 *c = &boot_cpu_data;
>  	u32 maxid;
> -	int err = 0, size, i;
> -
> -	maxid = c->x86_cache_max_closid;
> -
> -	size = maxid * sizeof(struct clos_cbm_table);
> -	cctable = kzalloc(size, GFP_KERNEL);
> -	if (!cctable) {
> -		err = -ENOMEM;
> -		goto out_err;
> +	int i;
> +	int ret;
> +
> +	if (unlikely(disable_cat_l3))

This inlikely is completely pointless. This is not a hotpath function. It
just makes the code harder to read.

> +		cat_l3_enabled = false;
> +	else if (cat_l3_supported(c))
> +		cat_l3_enabled = true;
> +	else if (rdt_opts.simulate_cat_l3 &&
> +		 get_cache_leaf(CACHE_LEVEL3, 0) >= 0)
> +		cat_l3_enabled = true;
> +	else
> +		cat_l3_enabled = false;

Please move that into resource_alloc_enabled() and make it readable.

> +	if (!resource_alloc_enabled())
> +		return -ENODEV;
> +
> +	if (rdt_opts.simulate_cat_l3) {
> +		boot_cpu_data.x86_l3_max_closid = 16;
> +		boot_cpu_data.x86_l3_max_cbm_len = 20;
> +	}
> +	for_each_online_cpu(i) {
> +		rdt_cpumask_update(&rdt_l3_cpumask, i, CACHE_LEVEL3);
>  	}
>  
> -	size = BITS_TO_LONGS(maxid) * sizeof(long);
> -	cconfig.closmap = kzalloc(size, GFP_KERNEL);
> -	if (!cconfig.closmap) {
> -		kfree(cctable);
> -		err = -ENOMEM;
> -		goto out_err;
> +	maxid = 0;
> +	if (cat_l3_enabled) {
> +		maxid = boot_cpu_data.x86_l3_max_closid;
> +		ret = cat_cache_init(CACHE_LEVEL3, maxid, &l3_cctable);
> +		if (ret)
> +			cat_l3_enabled = false;
>  	}
>  
> -	for_each_online_cpu(i)
> -		rdt_cpumask_update(i);
> +	if (!cat_l3_enabled)
> +		return -ENOSPC;

Huch? How do you get here when cat_l3_enabled is false?

> +
> +	ret = shared_domain_init();
> +	if (ret)
> +		return -ENODEV;

  Leaks closmaps

> +
> +	ret = cconfig_init(maxid);
> +	if (ret)
> +		return ret;

Leaks more stuff.

>  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
>  				"AP_INTEL_RDT_ONLINE",
>  				intel_rdt_online_cpu, intel_rdt_offline_cpu);

I still have not figured out how all that init scheme is working so that
you can use nocalls() for the hotplug registration. 

> -	if (err < 0)
> -		goto out_err;
> +	if (ret < 0)
> +		return ret;

And this as well.

Thanks,

	tglx