linux-kernel - Re: [PATCH v5 10/18] x86/intel_rdt: Build structures for each resource based on cache topology

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20161026211442.GB1752@linux.intel.com>
Date:   Wed, 26 Oct 2016 14:14:42 -0700
From:   Fenghua Yu <fenghua.yu@...el.com>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     Fenghua Yu <fenghua.yu@...el.com>,
        "H. Peter Anvin" <h.peter.anvin@...el.com>,
        Ingo Molnar <mingo@...e.hu>, Tony Luck <tony.luck@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Stephane Eranian <eranian@...gle.com>,
        Borislav Petkov <bp@...e.de>,
        Dave Hansen <dave.hansen@...el.com>,
        Nilay Vaish <nilayvaish@...il.com>, Shaohua Li <shli@...com>,
        David Carrillo-Cisneros <davidcc@...gle.com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Sai Prakhya <sai.praneeth.prakhya@...el.com>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        linux-kernel <linux-kernel@...r.kernel.org>, x86 <x86@...nel.org>
Subject: Re: [PATCH v5 10/18] x86/intel_rdt: Build structures for each
 resource based on cache topology

On Wed, Oct 26, 2016 at 03:02:56PM +0200, Thomas Gleixner wrote:
> On Sat, 22 Oct 2016, Fenghua Yu wrote:
> > +void rdt_cbm_update(void *arg)
> > +{
> > +	struct msr_param *m = (struct msr_param *)arg;
> > +	struct rdt_resource *r = m->res;
> > +	int i, cpu = smp_processor_id();
> > +	struct rdt_domain *d;
> > +
> > +	list_for_each_entry(d, &r->domains, list) {
> 
> > +static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> > +					  struct list_head **pos)
> > +{
> > +	struct rdt_domain *d;
> > +	struct list_head *l;
> > +
> > +	if (id < 0)
> > +		return ERR_PTR(id);
> > +
> > +	list_for_each(l, &r->domains) {
> > +		d = list_entry(l, struct rdt_domain, list);
> 
> So above you converted to list_for_each_entry(). Is there a sensible
> reason, aside of being sloppy, why is this still using list_for_each()?

We use list_for_each() because we want to get the list_head "l". The l
is used to find the position that the new domain will be inserted.

The same function rdt_find_domain() takes care of two similar tasks (find
matched domain or find a position in the domain list to insert a new domain).
Maybe not good for two tasks to share the same function?

> 
> > +		/* When id is found, return its domain. */
> > +		if (id == d->id)
> > +			return d;
> > +		/* Stop searching when finding id's position in sorted list. */
> 
> What is the reason that this needs to be in a sorted list?
> 
> I haven't found one so far. And if there is none, then this can use a hlist.
> 
> > +		if (id < d->id)
> > +			break;
> > +	}
> > +	/*
> > +	 * No id is found in resource domains. Record the position
> > +	 * that the new domain will be added. The posistion is not used
> > +	 * when removing a domain.
> 
> This comment makes no sense. If you want to document that a caller does not
> require the @pos argument, then you really should make it optional and do
> 
> 	if (pos)
> 		*pos = l;
> 
> But before doing that blindly, you want to explain why sorting is required
> at all.
> 
> > +	 */
> > +	*pos = l;
> > +
> > +	return NULL;
> > +}
> > +
> > +static void domain_add_cpu(int cpu, struct rdt_resource *r)
> > +{
> > +	int i, id = get_cache_id(cpu, r->cache_level);
> > +	struct list_head *add_pos = NULL;
> > +	struct rdt_domain *d;
> > +
> > +	d = rdt_find_domain(r, id, &add_pos);
> > +	if (IS_ERR(d)) {
> > +		pr_warn("Could't find cache id for cpu %d\n", cpu);
> > +		return;
> > +	}
> > +
> > +	if (d) {
> > +		cpumask_set_cpu(cpu, &d->cpu_mask);
> > +		return;
> > +	}
> > +
> > +	if (!add_pos) {
> > +		pr_warn("Couldn't add cpu %d in %s domain\n", cpu, r->name);
> 
> Errm, how can add_pos ever be NULL if you get here? Not at all AFAICT.
> 
> > +		return;
> > +	}
> > +
> > +	d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
> > +	if (!d)
> > +		return;
> > +
> > +	d->id = id;
> 
> Please move this after the allocation. This random code ordering just makes
> reading hard as one expects that d->id is a prerequisite for the
> allocation.
> 
> > +	d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
> > +	if (!d->cbm) {
> > +		pr_warn("Failed to alloc CBM array for cpu %d\n", cpu);
> > +		kfree(d);
> > +		return;
> > +	}
> 
> New line please. Visually seperating logical code blocks enhances
> readability.
> 
> > +	for (i = 0; i < r->num_closid; i++) {
> 
> Thanks,
> 
> 	tglx

The following patch #10 is supposed to fix issues you pointed out above.

Is it good now?

---
 arch/x86/include/asm/intel_rdt.h |  35 ++++++++
 arch/x86/kernel/cpu/intel_rdt.c  | 189 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 224 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 9780409..c0d0a6e 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -39,6 +39,34 @@ struct rdt_resource {
 	int			cbm_idx_offset;
 };
 
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list:	all instances of this resource
+ * @id:		unique id for this instance
+ * @cpu_mask:	which cpus share this resource
+ * @cbm:	array of cache bit masks (indexed by CLOSID)
+ */
+struct rdt_domain {
+	struct list_head	list;
+	int			id;
+	struct cpumask		cpu_mask;
+	u32			*cbm;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res:       The resource to use
+ * @low:       Beginning index from base MSR
+ * @high:      End index
+ */
+struct msr_param {
+	struct rdt_resource	*res;
+	int			low;
+	int			high;
+};
+
+extern struct mutex rdtgroup_mutex;
+
 extern struct rdt_resource rdt_resources_all[];
 
 enum {
@@ -56,6 +84,11 @@ enum {
 	     r++) 							      \
 		if (r->capable)
 
+#define for_each_enabled_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->enabled)
+
 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
 union cpuid_0x10_1_eax {
 	struct {
@@ -71,4 +104,6 @@ union cpuid_0x10_1_edx {
 	} split;
 	unsigned int full;
 };
+
+void rdt_cbm_update(void *arg);
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 29308e1..23f1740 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -26,11 +26,16 @@
 
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/intel_rdt_common.h>
 #include <asm/intel-family.h>
 #include <asm/intel_rdt.h>
 
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
 struct rdt_resource rdt_resources_all[] = {
@@ -72,6 +77,11 @@ struct rdt_resource rdt_resources_all[] = {
 	},
 };
 
+static int cbm_idx(struct rdt_resource *r, int closid)
+{
+	return closid * r->cbm_idx_multi + r->cbm_idx_offset;
+}
+
 /*
  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
  * as they do not have CPUID enumeration support for Cache allocation.
@@ -176,14 +186,193 @@ static inline bool get_rdt_resources(void)
 	return ret;
 }
 
+static int get_cache_id(int cpu, int level)
+{
+	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
+	int i;
+
+	for (i = 0; i < ci->num_leaves; i++) {
+		if (ci->info_list[i].level == level)
+			return ci->info_list[i].id;
+	}
+
+	return -1;
+}
+
+void rdt_cbm_update(void *arg)
+{
+	struct msr_param *m = (struct msr_param *)arg;
+	struct rdt_resource *r = m->res;
+	int i, cpu = smp_processor_id();
+	struct rdt_domain *d;
+
+	list_for_each_entry(d, &r->domains, list) {
+		/* Find the domain that contains this CPU */
+		if (cpumask_test_cpu(cpu, &d->cpu_mask))
+			goto found;
+	}
+	pr_info_once("cpu %d not found in any domain for resource %s\n",
+		     cpu, r->name);
+
+	return;
+
+found:
+	for (i = m->low; i < m->high; i++) {
+		int idx = cbm_idx(r, i);
+
+		wrmsrl(r->msr_base + idx, d->cbm[i]);
+	}
+}
+
+/*
+ * rdt_find_domain - Find a domain in a resource that matches input resource id
+ *
+ * Search a resource r's domain list to find the resource id. If the resource
+ * id is found in a domain, return the domain. Otherwise, if requested by
+ * caller, return the first domain whose id is bigger than the input id.
+ * The domain list is sorted by id in ascending order.
+ */
+static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+					  struct list_head **pos)
+{
+	struct rdt_domain *d;
+	struct list_head *l;
+
+	if (id < 0)
+		return ERR_PTR(id);
+
+	list_for_each(l, &r->domains) {
+		d = list_entry(l, struct rdt_domain, list);
+		/* When id is found, return its domain. */
+		if (id == d->id)
+			return d;
+		/* Stop searching when finding id's position in sorted list. */
+		if (id < d->id)
+			break;
+	}
+
+	if (pos)
+		*pos = l;
+
+	return NULL;
+}
+
+/*
+ * domain_add_cpu - Add a cpu to a resource's domain list.
+ *
+ * If an existing domain in the resource r's domain list matches the cpu's
+ * resource id, add the cpu in the domain.
+ *
+ * Otherwise, a new domain is allocated and inserted into right position
+ * in the domain list sorted by id in ascending order.
+ *
+ * The order in the domain list is visible to users when we print entries
+ * in the schemata file and schemata input is validated to have the same order
+ * as this list.
+ */
+static void domain_add_cpu(int cpu, struct rdt_resource *r)
+{
+	int i, id = get_cache_id(cpu, r->cache_level);
+	struct list_head *add_pos = NULL;
+	struct rdt_domain *d;
+
+	d = rdt_find_domain(r, id, &add_pos);
+	if (IS_ERR(d)) {
+		pr_warn("Could't find cache id for cpu %d\n", cpu);
+		return;
+	}
+
+	if (d) {
+		cpumask_set_cpu(cpu, &d->cpu_mask);
+		return;
+	}
+
+	d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
+	if (!d)
+		return;
+
+	d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
+	if (!d->cbm) {
+		pr_warn("Failed to alloc CBM array for cpu %d\n", cpu);
+		kfree(d);
+		return;
+	}
+
+	for (i = 0; i < r->num_closid; i++) {
+		int idx = cbm_idx(r, i);
+
+		d->cbm[i] = r->max_cbm;
+		wrmsrl(r->msr_base + idx, d->cbm[i]);
+	}
+
+	d->id = id;
+	cpumask_set_cpu(cpu, &d->cpu_mask);
+	list_add_tail(&d->list, add_pos);
+	r->num_domains++;
+}
+
+static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+{
+	int id = get_cache_id(cpu, r->cache_level);
+	struct rdt_domain *d;
+
+	d = rdt_find_domain(r, id, NULL);
+	if (IS_ERR_OR_NULL(d)) {
+		pr_warn("Could't find cache id for cpu %d\n", cpu);
+		return;
+	}
+
+	cpumask_clear_cpu(cpu, &d->cpu_mask);
+	if (cpumask_empty(&d->cpu_mask)) {
+		r->num_domains--;
+		kfree(d->cbm);
+		list_del(&d->list);
+		kfree(d);
+	}
+}
+
+static int intel_rdt_online_cpu(unsigned int cpu)
+{
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+	struct rdt_resource *r;
+
+	mutex_lock(&rdtgroup_mutex);
+	for_each_capable_rdt_resource(r)
+		domain_add_cpu(cpu, r);
+	state->closid = 0;
+	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+	mutex_unlock(&rdtgroup_mutex);
+
+	return 0;
+}
+
+static int intel_rdt_offline_cpu(unsigned int cpu)
+{
+	struct rdt_resource *r;
+
+	mutex_lock(&rdtgroup_mutex);
+	for_each_capable_rdt_resource(r)
+		domain_remove_cpu(cpu, r);
+	mutex_unlock(&rdtgroup_mutex);
+
+	return 0;
+}
+
 static int __init intel_rdt_late_init(void)
 {
 	bool first_resource = true;
 	struct rdt_resource *r;
+	int state;
 
 	if (!get_rdt_resources())
 		return -ENODEV;
 
+	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				  "x86/rdt/cat:online:",
+				  intel_rdt_online_cpu, intel_rdt_offline_cpu);
+	if (state < 0)
+		return state;
+
 	pr_info("Intel RDT allocation detected: ");
 	for_each_capable_rdt_resource(r) {
 		if (!first_resource)
-- 
2.5.0