linux-kernel - [PATCH 5/5] x86/mbm: Add support for MBM counter overflow handling

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <1455152873-30467-6-git-send-email-vikas.shivappa@linux.intel.com>
Date:	Wed, 10 Feb 2016 17:07:53 -0800
From:	Vikas Shivappa <vikas.shivappa@...ux.intel.com>
To:	vikas.shivappa@...el.com
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org, hpa@...or.com,
	tglx@...utronix.de, vikas.shivappa@...ux.intel.com,
	mingo@...nel.org, peterz@...radead.org, ravi.v.shankar@...el.com,
	tony.luck@...el.com, fenghua.yu@...el.com,
	kanaka.d.juvva@...el.com, h.peter.anvin@...el.com
Subject: [PATCH 5/5] x86/mbm: Add support for MBM counter overflow handling

This patch adds a per package timer which periodically updates the
Memory bandwidth counters for the events that are currently active.
Current patch has a periodic timer every 1s since the SDM guarantees
that the counter will not overflow in 1s but this time can be definitely
improved by calibrating on the system. The overflow is really a function
of the max memory b/w that the socket can support, max counter value and
scaling factor.

Signed-off-by: Vikas Shivappa <vikas.shivappa@...ux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 111 ++++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index fea22c8..8245dbd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -18,6 +18,11 @@
  * value
  */
 #define MBM_CNTR_MAX		0xffffff
+/*
+ * Guaranteed time in ms as per SDM where MBM counters will not overflow.
+ */
+#define MBM_CTR_OVERFLOW_TIME	1000
+
 static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 static bool cqm_enabled, mbm_enabled;
@@ -49,6 +54,7 @@ struct intel_pqr_state {
  * interrupts disabled, which is sufficient for the protection.
  */
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+static struct hrtimer *mbm_timers;
 /**
  * struct sample - mbm event's (local or total) data
  * @interval_start Time this interval began
@@ -1123,6 +1129,84 @@ static void __intel_mbm_event_count(void *info)
 	atomic64_add(val, &rr->value);
 }
 
+static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
+{
+	struct perf_event *iter, *iter1;
+	struct list_head *head;
+	unsigned long flags;
+	u32 grp_rmid;
+
+	/*
+	 * Need to cache_lock as the timer Event Select MSR reads
+	 * can race with the mbm/cqm count() and mbm_init() reads.
+	 */
+	mutex_lock(&cache_mutex);
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
+	if (list_empty(&cache_groups))
+		goto out;
+
+	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+		grp_rmid = iter->hw.cqm_rmid;
+		if (!__rmid_valid(grp_rmid))
+			continue;
+		if (is_mbm_event(iter->attr.config))
+			update_sample(grp_rmid, iter->attr.config, 0);
+
+		head = &iter->hw.cqm_group_entry;
+		if (list_empty(head))
+			continue;
+		list_for_each_entry(iter1, head, hw.cqm_group_entry) {
+			if (!iter1->hw.is_group_event)
+				break;
+			if (is_mbm_event(iter1->attr.config))
+				update_sample(iter1->hw.cqm_rmid,
+					      iter1->attr.config, 0);
+		}
+	}
+
+out:
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+	mutex_unlock(&cache_mutex);
+
+	hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
+
+	return HRTIMER_RESTART;
+}
+
+static void __mbm_start_timer(void *info)
+{
+	hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
+			     HRTIMER_MODE_REL_PINNED);
+}
+
+static void __mbm_stop_timer(void *info)
+{
+	hrtimer_cancel(&mbm_timers[pkg_id]);
+}
+
+static void mbm_start_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
+}
+
+static void mbm_stop_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
+}
+
+static void mbm_hrtimer_init(void)
+{
+	struct hrtimer *hr;
+	int i;
+
+	for (i = 0; i < mbm_socket_max; i++) {
+		hr = &mbm_timers[i];
+		hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hr->function = mbm_hrtimer_handle;
+	}
+}
+
 static u64 intel_cqm_event_count(struct perf_event *event)
 {
 	unsigned long flags;
@@ -1285,6 +1369,12 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 		}
 	}
 
+	/*
+	 * Stop the mbm overflow timers when the last event is destroyed.
+	*/
+	if (list_empty(&cache_groups))
+		mbm_stop_timers();
+
 	mutex_unlock(&cache_mutex);
 }
 
@@ -1317,6 +1407,12 @@ static int intel_cqm_event_init(struct perf_event *event)
 
 	mutex_lock(&cache_mutex);
 
+	/*
+	 * Start the mbm overflow timers when the first event is created.
+	*/
+	if (list_empty(&cache_groups))
+		mbm_start_timers();
+
 	/* Will also set rmid */
 	intel_cqm_setup_event(event, &group);
 
@@ -1615,10 +1711,23 @@ static int intel_mbm_init(void)
 
 	mbm_total = kmalloc(array_size, GFP_KERNEL);
 	if (!mbm_total) {
-		kfree(mbm_local);
 		ret = -ENOMEM;
+		goto out;
+	}
+
+	array_size = sizeof(struct hrtimer) * mbm_socket_max;
+	mbm_timers = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_timers) {
+		ret = -ENOMEM;
+		goto out;
 	}
+	mbm_hrtimer_init();
+
 out:
+	if (ret) {
+		kfree(mbm_local);
+		kfree(mbm_total);
+	}
 
 	return ret;
 }
-- 
1.9.1