lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 11 May 2015 12:02:54 -0700
From:	Vikas Shivappa <vikas.shivappa@...ux.intel.com>
To:	vikas.shivappa@...el.com
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org, hpa@...or.com,
	tglx@...utronix.de, mingo@...nel.org, tj@...nel.org,
	peterz@...radead.org, matt.fleming@...el.com, will.auld@...el.com,
	peter.zijlstra@...el.com, h.peter.anvin@...el.com,
	kanaka.d.juvva@...el.com, mtosatti@...hat.com,
	vikas.shivappa@...ux.intel.com
Subject: [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR

This patch implements a common software cache for IA32_PQR_MSR(RMID 0:9,
    CLOSId 32:63) to be used by both Cache monitoring(CMT) and Cache
allocation. CMT updates the RMID where as cache_alloc updates the CLOSid
in the software cache.  During scheduling when the new RMID/CLOSid value
is different from the cached values, IA32_PQR_MSR is updated.  Since the
measured rdmsr latency for IA32_PQR_MSR is very high(~250 cycles) this
software cache is necessary to avoid reading the MSR to compare the
current CLOSid value. Caching reduces the frequency of MSR writes during
the scheduler hot path for cache allocation.  During CPU hotplug pqr
cache is updated to zero.

Signed-off-by: Vikas Shivappa <vikas.shivappa@...ux.intel.com>

Conflicts:
	arch/x86/kernel/cpu/perf_event_intel_cqm.c
---
 arch/x86/include/asm/intel_rdt.h           |  9 ++++++---
 arch/x86/include/asm/rdt_common.h          | 13 +++++++++++++
 arch/x86/kernel/cpu/intel_rdt.c            | 30 +++++++++++++++++-------------
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 20 +++++++-------------
 4 files changed, 43 insertions(+), 29 deletions(-)
 create mode 100644 arch/x86/include/asm/rdt_common.h

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 589394b..f4372d8 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,16 +4,16 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+#include <asm/rdt_common.h>
 
-#define MSR_IA32_PQR_ASSOC		0xc8f
 #define MAX_CBM_LENGTH			32
 #define IA32_L3_CBM_BASE		0xc90
 #define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
-DECLARE_PER_CPU(unsigned int, x86_cpu_clos);
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
 extern struct static_key rdt_enable_key;
 extern void __rdt_sched_in(void);
 
-
 struct rdt_subsys_info {
 	/* Clos Bitmap to keep track of available CLOSids.*/
 	unsigned long *closmap;
@@ -67,6 +67,9 @@ static inline struct intel_rdt *task_rdt(struct task_struct *task)
  * IA32_PQR_MSR writes until the user starts really using the feature
  * ie creates a rdt cgroup directory and assigns a cache_mask thats
  * different from the root cgroup's cache_mask.
+ * - Caches the per cpu CLOSid values and does the MSR write only
+ * when a task with a different CLOSid is scheduled in. That
+ * means the task belongs to a different cgroup.
  * - Closids are allocated so that different cgroup directories
  * with same cache_mask gets the same CLOSid. This minimizes CLOSids
  * used and reduces MSR write frequency.
diff --git a/arch/x86/include/asm/rdt_common.h b/arch/x86/include/asm/rdt_common.h
new file mode 100644
index 0000000..33fd8ea
--- /dev/null
+++ b/arch/x86/include/asm/rdt_common.h
@@ -0,0 +1,13 @@
+#ifndef _X86_RDT_H_
+#define _X86_RDT_H_
+
+#define MSR_IA32_PQR_ASSOC	0x0c8f
+
+struct intel_pqr_state {
+	raw_spinlock_t	lock;
+	int		rmid;
+	int		clos;
+	int		cnt;
+};
+
+#endif
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index fe3ce4e..2415965 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -85,27 +85,28 @@ void __rdt_sched_in(void)
 {
 	struct task_struct *task = current;
 	struct intel_rdt *ir;
-	unsigned int clos;
-
-	/*
-	 * This needs to be fixed
-	 * to cache the whole PQR instead of just CLOSid.
-	 * PQR has closid in high 32 bits and CQM-RMID in low 10 bits.
-	 * Should not write a 0 to the low 10 bits of PQR
-	 * and corrupt RMID.
-	 */
-	clos = this_cpu_read(x86_cpu_clos);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+	unsigned long flags;
 
+	raw_spin_lock_irqsave(&state->lock, flags);
 	rcu_read_lock();
 	ir = task_rdt(task);
-	if (ir->clos == clos) {
+	if (ir->clos == state->clos) {
 		rcu_read_unlock();
+		raw_spin_unlock_irqrestore(&state->lock, flags);
 		return;
 	}
 
-	wrmsr(MSR_IA32_PQR_ASSOC, 0, ir->clos);
-	this_cpu_write(x86_cpu_clos, ir->clos);
+	/*
+	 * PQR has closid in high 32 bits and CQM-RMID
+	 * in low 10 bits. Rewrite the exsting rmid from
+	 * software cache.
+	 */
+	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, ir->clos);
+	state->clos = ir->clos;
 	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&state->lock, flags);
+
 }
 
 static void __clos_get(unsigned int closid)
@@ -372,6 +373,9 @@ static inline bool intel_rdt_update_cpumask(int cpu)
  */
 static inline void intel_rdt_cpu_start(int cpu)
 {
+	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
+
+	state->clos = 0;
 	mutex_lock(&rdt_group_mutex);
 	if (intel_rdt_update_cpumask(cpu))
 		cbm_update_msrs(cpu);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b..fd039899 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -7,22 +7,16 @@
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <asm/cpu_device_id.h>
+#include <asm/rdt_common.h>
 #include "perf_event.h"
 
-#define MSR_IA32_PQR_ASSOC	0x0c8f
 #define MSR_IA32_QM_CTR		0x0c8e
 #define MSR_IA32_QM_EVTSEL	0x0c8d
 
 static unsigned int cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
-struct intel_cqm_state {
-	raw_spinlock_t		lock;
-	int			rmid;
-	int			cnt;
-};
-
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -961,7 +955,7 @@ out:
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 	unsigned int rmid = event->hw.cqm_rmid;
 	unsigned long flags;
 
@@ -978,14 +972,14 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 		WARN_ON_ONCE(state->rmid);
 
 	state->rmid = rmid;
-	wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, state->clos);
 
 	raw_spin_unlock_irqrestore(&state->lock, flags);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 	unsigned long flags;
 
 	if (event->hw.cqm_state & PERF_HES_STOPPED)
@@ -998,7 +992,7 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 
 	if (!--state->cnt) {
 		state->rmid = 0;
-		wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+		wrmsr(MSR_IA32_PQR_ASSOC, 0, state->clos);
 	} else {
 		WARN_ON_ONCE(!state->rmid);
 	}
@@ -1243,7 +1237,7 @@ static inline void cqm_pick_event_reader(int cpu)
 
 static void intel_cqm_cpu_prepare(unsigned int cpu)
 {
-	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	raw_spin_lock_init(&state->lock);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ