linux-kernel - Re: [PATCH v2 30/33] x86/intel_rdt_rdtgroup.c: Process schemata input from resctrl interface

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1609082252170.5454@nanos>
Date:   Fri, 9 Sep 2016 00:20:40 +0200 (CEST)
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Fenghua Yu <fenghua.yu@...el.com>
cc:     "H. Peter Anvin" <h.peter.anvin@...el.com>,
        Ingo Molnar <mingo@...e.hu>, Tony Luck <tony.luck@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Tejun Heo <tj@...nel.org>, Borislav Petkov <bp@...e.de>,
        Stephane Eranian <eranian@...gle.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>,
        Shaohua Li <shli@...com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        Sai Prakhya <sai.praneeth.prakhya@...el.com>,
        linux-kernel <linux-kernel@...r.kernel.org>, x86 <x86@...nel.org>
Subject: Re: [PATCH v2 30/33] x86/intel_rdt_rdtgroup.c: Process schemata
 input from resctrl interface

On Thu, 8 Sep 2016, Fenghua Yu wrote:

> +struct resources {

Darn. The first look made me parse this as a redefinition of 'struct
resource' ... Can't you find an even more generic name for this?

> +	struct cache_resource *l3;
> +};
> +
> +static int get_res_type(char **res, enum resource_type *res_type)
> +{
> +	char *tok;
> +
> +	tok = strsep(res, ":");
> +	if (tok == NULL)

We still write: if (!tok) as anywhere else.

> +static int divide_resources(char *buf, char *resources[RESOURCE_NUM])
> +{
> +	char *tok;
> +	unsigned int resource_num = 0;
> +	int ret = 0;
> +	char *res;
> +	char *res_block;
> +	size_t size;
> +	enum resource_type res_type;

Sigh.

> +
> +	size = strlen(buf) + 1;
> +	res = kzalloc(size, GFP_KERNEL);
> +	if (!res) {
> +		ret = -ENOSPC;
> +		goto out;
> +	}
> +
> +	while ((tok = strsep(&buf, "\n")) != NULL) {
> +		if (strlen(tok) == 0)
> +			break;
> +		if (resource_num++ >= 1) {

How gets that ever greater 1?

> +			pr_info("More than one line of resource input!\n");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		strcpy(res, tok);
> +	}
> +
> +	res_block = res;
> +	ret = get_res_type(&res_block, &res_type);
> +	if (ret) {
> +		pr_info("Unknown resource type!");
> +		goto out;
> +	}
> +
> +	if (res_block == NULL) {
> +		pr_info("Invalid resource value!");
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (res_type == RESOURCE_L3 && cat_enabled(CACHE_LEVEL3)) {
> +		strcpy(resources[RESOURCE_L3], res_block);
> +	} else {
> +		pr_info("Invalid resource type!");
> +		goto out;
> +	}
> +
> +	ret = 0;


You surely found the most convoluted solution for this. Whats wrong with:

	data = get_res_type(res, &type);
	if (IS_ERR(data)) {
		ret = PTR_ERR(data);
		goto out;
	} 

	ret = 0;
	switch (type) {
	case RESOURCE_L3:
		if (cat_enabled(CACHE_LEVEL3))
			strcpy(resources[RESOURCE_L3], data);
		break;
	default:
		ret = -EINVAL;
	}	

That's too simple to understand and too extensible for future resource
types, right?

> +out:
> +	kfree(res);
> +	return ret;
> +}

> +static int get_input_cbm(char *tok, struct cache_resource *l,
> +			 int input_domain_num, int level)
> +{
> +	int ret;
> +
> +	if (!cdp_enabled) {
> +		if (tok == NULL)
> +			return -EINVAL;
> +
> +		ret = kstrtoul(tok, 16,
> +			       (unsigned long *)&l->cbm[input_domain_num]);

What is this type cast for? Can't you just parse the data into a local
unsigned long and then store it after validation?

> +		if (ret)
> +			return ret;
> +
> +		if (!cbm_validate(l->cbm[input_domain_num], level))
> +			return -EINVAL;
> +	} else  {
> +		char *input_cbm1_str;
> +
> +		input_cbm1_str = strsep(&tok, ",");
> +		if (input_cbm1_str == NULL || tok == NULL)
> +			return -EINVAL;
> +
> +		ret = kstrtoul(input_cbm1_str, 16,
> +			       (unsigned long *)&l->cbm[input_domain_num]);
> +		if (ret)
> +			return ret;
> +
> +		if (!cbm_validate(l->cbm[input_domain_num], level))
> +			return -EINVAL;
> +
> +		ret = kstrtoul(tok, 16,
> +			       (unsigned long *)&l->cbm2[input_domain_num]);
> +		if (ret)
> +			return ret;
> +
> +		if (!cbm_validate(l->cbm2[input_domain_num], level))
> +			return -EINVAL;

So you have 3 copies of the same sequence now. At other places you split
out the most tiniest stuff into a gazillion of helper functions ...

Just create a parser helper and call it for any of those types. So the
whole thing boils down to:

static int parse_cbm_token(char *tok, u64 *cbm, int level)
{
	unsigned long data;
	int ret;

	ret = kstrtoul(tok, 16, &data);
	if (ret)
		return ret;
	if (!cbm_validate(data, level))
	   	return -EINVAL;
	*cbm = data;
	return 0;
}

static int parse_cbm(char *buf, struct cache_resource *cr, int domain,
		     int level)
{
	char *cbm1 = buf;
	int ret;

	if (cdp_enabled)
		cbm1 = strsep(&buf, ',');

	if (!cbm1 || !buf)
	   	return -EINVAL;

	ret = parse_cbm_token(cbm1, &cr->cbm[domain]);
	if (ret)
		return ret;

	if (cdp_enmabled)
		return parse_cbm_token(buf, &cr->cbm2[domain]);
	return 0;
}

Copy and paste is simpler than thinking, but the result is uglier and
harder to read.

> +static int get_cache_schema(char *buf, struct cache_resource *l, int level,
> +			 struct rdtgroup *rdtgrp)
> +{
> +	char *tok, *tok_cache_id;
> +	int ret;
> +	int domain_num;
> +	int input_domain_num;
> +	int len;
> +	unsigned int input_cache_id;
> +	unsigned int cid;
> +	unsigned int leaf;
> +
> +	if (!cat_enabled(level) && strcmp(buf, ";")) {
> +		pr_info("Disabled resource should have empty schema\n");

So an empty schema is a string which is != ";". Very interesting.

This enabled check here wants more thoughts. If a resource is disabled,
then the input line should be simply ignored. Otherwise you need to rewrite
scripts, config files just because you disabled a particular resource.

> +		return -EINVAL;
> +	}
> +}
> +
> +enum {
> +	CURRENT_CLOSID,
> +	REUSED_OWN_CLOSID,
> +	REUSED_OTHER_CLOSID,
> +	NEW_CLOSID,
> +};

Another random enum in the middle of the code and at a place where it is
completely disjunct from its usage.

> +
> +/*
> + * Check if the reference counts are all ones in rdtgrp's domain.
> + */
> +static bool one_refcnt(struct rdtgroup *rdtgrp, int domain)

A really self explaining function name - NOT!

> +/*
> + * Go through all shared domains. Check if there is an existing closid
> + * in all rdtgroups that matches l3 cbms in the shared
> + * domain. If find one, reuse the closid. Otherwise, allocate a new one.
> + */
> +static int get_rdtgroup_resources(struct resources *resources_set,
> +				  struct rdtgroup *rdtgrp)
> +{
> +	struct cache_resource *l3;
> +	bool l3_cbm_found;
> +	struct list_head *l;
> +	struct rdtgroup *r;
> +	u64 cbm;
> +	int rdt_closid[MAX_CACHE_DOMAINS];
> +	int rdt_closid_type[MAX_CACHE_DOMAINS];

Have you ever checked what the stack foot print of this whole callchain is?
One of the callers has already a char array[1024] on the stack.....

> +	int domain;
> +	int closid;
> +	int ret;
> +
> +	l3 = resources_set->l3;
> +	memcpy(rdt_closid, rdtgrp->resource.closid,
> +	       shared_domain_num * sizeof(int));

Can you please seperate stuff with new lines ocassionally to make it
readable?

> +	for (domain = 0; domain < shared_domain_num; domain++) {
> +		if (rdtgrp->resource.valid) {
> +			/*
> +			 * If current rdtgrp is the only user of cbms in
> +			 * this domain, will replace the cbms with the input
> +			 * cbms and reuse its own closid.
> +			 */
> +			if (one_refcnt(rdtgrp, domain)) {
> +				closid = rdtgrp->resource.closid[domain];
> +				rdt_closid[domain] = closid;
> +				rdt_closid_type[domain] = REUSED_OWN_CLOSID;
> +				continue;
> +			}
> +
> +			l3_cbm_found = true;
> +
> +			if (cat_l3_enabled)
> +				l3_cbm_found = cbm_found(l3, rdtgrp, domain,
> +							 CACHE_LEVEL3);
> +
> +			/*
> +			 * If the cbms in this shared domain are already
> +			 * existing in current rdtgrp, record the closid
> +			 * and its type.
> +			 */
> +			if (l3_cbm_found) {
> +				closid = rdtgrp->resource.closid[domain];
> +				rdt_closid[domain] = closid;
> +				rdt_closid_type[domain] = CURRENT_CLOSID;
> +				continue;
> +			}

This is unreadable once more.

     		   	if (find_cbm(l3, rdtgrp, domain, CACHE_LEVEL3) {
				closid = rdtgrp->resource.closid[domain];
				rdt_closid[domain] = closid;
				rdt_closid_type[domain] = CURRENT_CLOSID;
				continue;
			}

That requires that find_cbm() - which is a way more intuitive name than
cbm_found() - returns false when cat_l3_enabled is false. Which is trivial
and obvious ...
			
> +		}
> +
> +		/*
> +		 * If the cbms are not found in this rdtgrp, search other
> +		 * rdtgroups and see if there are matched cbms.
> +		 */
> +		l3_cbm_found = cat_l3_enabled ? false : true;

What the heck?

		l3_cbm_found = !cat_l3_enabled;

Is too simple obviously.

Aside of that silly conditional: If cat_l3_enables is false then
l3_cbm_found is true.

> +		list_for_each(l, &rdtgroup_lists) {
> +			r = list_entry(l, struct rdtgroup, rdtgroup_list);
> +			if (r == rdtgrp || !r->resource.valid)
> +				continue;
> +
> +			if (cat_l3_enabled)
> +				l3_cbm_found = cbm_found(l3, r, domain,
> +							 CACHE_LEVEL3);

And because this path is never taken when cat_l3_enabled is false.
 
> +
> +			if (l3_cbm_found) {

We happily get the closid for something which is not enabled at all. What
is the logic here? I can't find any in this convoluted mess.

> +				/* Get the closid that matches l3 cbms.*/
> +				closid = r->resource.closid[domain];
> +				rdt_closid[domain] = closid;
> +				rdt_closid_type[domain] = REUSED_OTHER_CLOSID;
> +				break;
> +			}
> +		}
> +		if (!l3_cbm_found) {
> +			/*
> +			 * If no existing closid is found, allocate
> +			 * a new one.
> +			 */
> +			ret = closid_alloc(&closid, domain);
> +			if (ret)
> +				goto err;
> +			rdt_closid[domain] = closid;
> +			rdt_closid_type[domain] = NEW_CLOSID;
> +		}
> +	}

I really don't want to imagine how this might look like when you add L2
support and if you have code doing this please hide it in the poison
cabinet forever,

> +	/*
> +	 * Now all closid are ready in rdt_closid. Update rdtgrp's closid.
> +	 */
> +	for_each_cache_domain(domain, 0, shared_domain_num) {
> +		/*
> +		 * Nothing is changed if the same closid and same cbms were
> +		 * found in this rdtgrp's domain.
> +		 */
> +		if (rdt_closid_type[domain] == CURRENT_CLOSID)
> +			continue;
> +
> +		/*
> +		 * Put rdtgroup closid. No need to put the closid if we
> +		 * just change cbms and keep the closid (REUSED_OWN_CLOSID).
> +		 */
> +		if (rdtgrp->resource.valid &&
> +		    rdt_closid_type[domain] != REUSED_OWN_CLOSID) {
> +			/* Put old closid in this rdtgrp's domain if valid. */
> +			closid = rdtgrp->resource.closid[domain];
> +			closid_put(closid, domain);
> +		}
> +
> +		/*
> +		 * Replace the closid in this rdtgrp's domain with saved
> +		 * closid that was newly allocted (NEW_CLOSID), or found in
> +		 * another rdtgroup's domains (REUSED_CLOSID), or found in
> +		 * this rdtgrp (REUSED_OWN_CLOSID).
> +		 */
> +		closid = rdt_closid[domain];
> +		rdtgrp->resource.closid[domain] = closid;
> +
> +		/*
> +		 * Get the reused other rdtgroup's closid. No need to get the
> +		 * closid newly allocated (NEW_CLOSID) because it's been
> +		 * already got in closid_alloc(). And no need to get the closid
> +		 * for resued own closid (REUSED_OWN_CLOSID).
> +		 */
> +		if (rdt_closid_type[domain] == REUSED_OTHER_CLOSID)
> +			closid_get(closid, domain);
> +
> +		/*
> +		 * If the closid comes from a newly allocated closid
> +		 * (NEW_CLOSID), or found in this rdtgrp (REUSED_OWN_CLOSID),
> +		 * cbms for this closid will be updated in MSRs.
> +		 */
> +		if (rdt_closid_type[domain] == NEW_CLOSID ||
> +		    rdt_closid_type[domain] == REUSED_OWN_CLOSID) {
> +			/*
> +			 * Update cbm in cctable with the newly allocated
> +			 * closid.
> +			 */
> +			if (cat_l3_enabled) {
> +				int cpu;
> +				struct cpumask *mask;
> +				int dindex;
> +				int l3_domain = shared_domain[domain].l3_domain;
> +				int leaf = level_to_leaf(CACHE_LEVEL3);
> +
> +				cbm = l3->cbm[l3_domain];
> +				dindex = get_dcbm_table_index(closid);
> +				l3_cctable[l3_domain][dindex].cbm = cbm;
> +				if (cdp_enabled) {
> +					int iindex;
> +
> +					cbm = l3->cbm2[l3_domain];
> +					iindex = get_icbm_table_index(closid);
> +					l3_cctable[l3_domain][iindex].cbm = cbm;
> +				}
> +
> +				mask =
> +				&cache_domains[leaf].shared_cpu_map[l3_domain];
> +
> +				cpu = cpumask_first(mask);
> +				smp_call_function_single(cpu, cbm_update_l3_msr,
> +							 &closid, 1);

Again, why don't ypu split that out into a seperate function instead of
having the forth indentation level and random line breaks?

> +static void init_cache_resource(struct cache_resource *l)
> +{
> +	l->cbm = NULL;
> +	l->cbm2 = NULL;
> +	l->closid = NULL;
> +	l->refcnt = NULL;

memset ?

> +}
> +
> +static void free_cache_resource(struct cache_resource *l)
> +{
> +	kfree(l->cbm);
> +	kfree(l->cbm2);
> +	kfree(l->closid);
> +	kfree(l->refcnt);
> +}
> +
> +static int alloc_cache_resource(struct cache_resource *l, int level)
> +{
> +	int domain_num = get_domain_num(level);
> +
> +	l->cbm = kcalloc(domain_num, sizeof(*l->cbm), GFP_KERNEL);
> +	l->cbm2 = kcalloc(domain_num, sizeof(*l->cbm2), GFP_KERNEL);
> +	l->closid = kcalloc(domain_num, sizeof(*l->closid), GFP_KERNEL);
> +	l->refcnt = kcalloc(domain_num, sizeof(*l->refcnt), GFP_KERNEL);
> +	if (l->cbm && l->cbm2 && l->closid && l->refcnt)
> +		return 0;
> +
> +	return -ENOMEM;
> +}
> +
> +/*
> + * This function digests schemata given in text buf. If the schemata are in
> + * right format and there is enough closid, input the schemata in rdtgrp
> + * and update resource cctables.
> + *
> + * Inputs:
> + *	buf: string buffer containing schemata
> + *	rdtgrp: current rdtgroup holding schemata.
> + *
> + * Return:
> + *	0 on success or error code.
> + */
> +static int get_resources(char *buf, struct rdtgroup *rdtgrp)
> +{
> +	char *resources[RESOURCE_NUM];
> +	struct cache_resource l3;
> +	struct resources resources_set;
> +	int ret;
> +	char *resources_block;
> +	int i;
> +	int size = strlen(buf) + 1;
> +
> +	resources_block = kcalloc(RESOURCE_NUM, size, GFP_KERNEL);
> +	if (!resources_block)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < RESOURCE_NUM; i++)
> +		resources[i] = (char *)(resources_block + i * size);

This is a recurring scheme in your code. Allocating a runtime sized array
and initializing pointers.

Darn, instead of open coding this in several places can't you just make a
single function which does exactly that?

> +	ret = divide_resources(buf, resources);
> +	if (ret) {
> +		kfree(resources_block);
> +		return -EINVAL;
> +	}
> +
> +	init_cache_resource(&l3);
> +
> +	if (cat_l3_enabled) {
> +		ret = alloc_cache_resource(&l3, CACHE_LEVEL3);
> +		if (ret)
> +			goto out;
> +
> +		ret = get_cache_schema(resources[RESOURCE_L3], &l3,
> +				       CACHE_LEVEL3, rdtgrp);
> +		if (ret)
> +			goto out;
> +
> +		resources_set.l3 = &l3;
> +	} else
> +		resources_set.l3 = NULL;



> +
> +	ret = get_rdtgroup_resources(&resources_set, rdtgrp);
> +
> +out:
> +	kfree(resources_block);
> +	free_cache_resource(&l3);
> +
> +	return ret;
> +}
> +
> +static void gen_cache_prefix(char *buf, int level)
> +{
> +	sprintf(buf, "L%1d:", level == CACHE_LEVEL3 ? 3 : 2);
> +}
> +
> +static int get_cache_id(int domain, int level)
> +{
> +	return cache_domains[level_to_leaf(level)].shared_cache_id[domain];
> +}
> +
> +static void gen_cache_buf(char *buf, int level)
> +{
> +	int domain;
> +	char buf1[32];
> +	int domain_num;
> +	u64 val;
> +
> +	gen_cache_prefix(buf, level);
> +
> +	domain_num = get_domain_num(level);
> +
> +	val = max_cbm(level);
> +
> +	for (domain = 0; domain < domain_num; domain++) {
> +		sprintf(buf1, "%d=%lx", get_cache_id(domain, level),
> +			(unsigned long)val);
> +		strcat(buf, buf1);

WTF?

	char *p = buf;

	p += sprintf(p, "....", ...);
	p += sprintf(p, "....", ...);
	p += sprintf(p, "....", ...);

Solves the same problem as this local buffer on the stack plus strcat().

> +/*
> + * Set up default schemata in a rdtgroup. All schemata in all resources are
> + * default values (all 1's) for all domains.
> + *
> + * Input: rdtgroup.
> + * Return: 0: successful
> + *	   non-0: error code
> + */
> +int get_default_resources(struct rdtgroup *rdtgrp)
> +{
> +	char schema[1024];

And that number is pulled out of thin air or what?

> +	int ret = 0;
> +
> +	if (cat_enabled(CACHE_LEVEL3)) {
> +		gen_cache_buf(schema, CACHE_LEVEL3);
> +
> +		if (strlen(schema)) {
> +			ret = get_resources(schema, rdtgrp);
> +			if (ret)
> +				return ret;
> +		}
> +		gen_cache_buf(rdtgrp->schema, CACHE_LEVEL3);
> +	}
> +
> +	return ret;
> +}
> +
> +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> +			char *buf, size_t nbytes, loff_t off)
> +{
> +	int ret = 0;
> +	struct rdtgroup *rdtgrp;
> +	char *schema;
> +
> +	rdtgrp = rdtgroup_kn_lock_live(of->kn);
> +	if (!rdtgrp)
> +		return -ENODEV;
> +
> +	schema = kzalloc(sizeof(char) * strlen(buf) + 1, GFP_KERNEL);
> +	if (!schema) {
> +		ret = -ENOMEM;
> +		goto out_unlock;
> +	}
> +
> +	memcpy(schema, buf, strlen(buf) + 1);

Open coding kstrdup() is indeed useful. and reevaluating strlen(buf) three
times in the same function is even more useful.

> +
> +	ret = get_resources(buf, rdtgrp);
> +	if (ret)
> +		goto out;
> +
> +	memcpy(rdtgrp->schema, schema, strlen(schema) + 1);

IIRC then the kernel has even strcpy() and strncpy for that matter.

Btw, what makes sure that strlen(schema) is < 1023 ?????

Thanks,

	tglx