lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 29 Oct 2016 17:38:35 -0700
From:   David Carrillo-Cisneros <davidcc@...gle.com>
To:     linux-kernel@...r.kernel.org
Cc:     "x86@...nel.org" <x86@...nel.org>, Ingo Molnar <mingo@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vegard Nossum <vegard.nossum@...il.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        Nilay Vaish <nilayvaish@...il.com>,
        Borislav Petkov <bp@...e.de>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Fenghua Yu <fenghua.yu@...el.com>,
        Paul Turner <pjt@...gle.com>,
        Stephane Eranian <eranian@...gle.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>
Subject: [PATCH v3 38/46] perf/x86/intel/cmt: introduce read SLOs for rotation

To make rmid rotation more dependable, this patch series introduces
rotation Service Level Objectives (SLOs) that are described in
code's documentation.

This patch introduces cmt_{pre,min}_mon_slice SLOs that protects from
bogus values when a rmid has not been available since the beginning of
monitoring. It also introduces auxiliary variables necessary for the
SLOs to work and the checks in intel_cmt_event_read that enforce the SLOs
for the read of llc_occupancy event.

Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
 arch/x86/events/intel/cmt.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cmt.h | 28 +++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3ade923..649eb5f 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -51,6 +51,25 @@ static size_t pkg_uflags_size;
 static struct pkg_data **cmt_pkgs_data;
 
 /*
+ * Rotation Service Level Objectives (SLO) for monrs with llc_occupancy
+ * monitoring. Note that these are monr level SLOs, therefore all pmonrs in
+ * the monr meet or exceed them.
+ * (A "monitored"  monr is a monr with no pmonr in a Dependent state).
+ *
+ * SLOs:
+ *
+ * @__cmt_pre_mon_slice: Min time a monr is monitored before being readable.
+ * @__cmt_min_mon_slice: Min time a monr stays monitored after becoming
+ *                       readable.
+ */
+#define CMT_DEFAULT_PRE_MON_SLICE 2000		/* ms */
+static u64 __cmt_pre_mon_slice;
+
+#define CMT_DEFAULT_MIN_MON_SLICE 5000		/* ms */
+static u64 __cmt_min_mon_slice;
+
+
+/*
  * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
  * Otherwise next online pkg_data or NULL if no more.
  */
@@ -300,6 +319,7 @@ static void pmonr_to_unused(struct pmonr *pmonr)
 			pmonr_move_all_dependants(pmonr, lender);
 		}
 		__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+		pkgd->nr_dirty_rmids++;
 
 	} else if (pmonr->state == PMONR_DEP_IDLE ||
 		   pmonr->state == PMONR_DEP_DIRTY) {
@@ -312,6 +332,11 @@ static void pmonr_to_unused(struct pmonr *pmonr)
 			__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
 		else
 			pkgd->nr_dep_pmonrs--;
+
+
+		if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+			atomic64_set(&pmonr->monr->last_rmid_recoup,
+				     get_jiffies_64());
 	} else {
 		WARN_ON_ONCE(true);
 		return;
@@ -372,6 +397,7 @@ static inline void __pmonr_to_dep_helper(
 
 	lender_rmids.value = atomic64_read(&lender->atomic_rmids);
 	pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+	atomic_inc(&pmonr->monr->nr_dep_pmonrs);
 }
 
 static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
@@ -390,6 +416,7 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
 
 static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
 {
+	struct pkg_data *pkgd = pmonr->pkgd;
 	struct pmonr *lender;
 	union pmonr_rmids rmids;
 
@@ -398,6 +425,7 @@ static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
 
 	rmids.value = atomic64_read(&pmonr->atomic_rmids);
 	__pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+	pkgd->nr_dirty_rmids++;
 }
 
 static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
@@ -408,6 +436,9 @@ static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
 	pmonr_move_dependants(pmonr->lender, pmonr);
 	pmonr->lender = NULL;
 	__pmonr_to_active_helper(pmonr, rmid);
+
+	if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+		atomic64_set(&pmonr->monr->last_rmid_recoup, get_jiffies_64());
 }
 
 static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
@@ -422,6 +453,7 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
 	union pmonr_rmids rmids;
 
 	rmids.value = atomic64_read(&pmonr->atomic_rmids);
+	pmonr->pkgd->nr_dirty_rmids--;
 	__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
 }
 
@@ -1599,7 +1631,7 @@ static int read_all_pkgs(struct monr *monr, int wait_time_ms, u64 *count)
 static int intel_cmt_event_read(struct perf_event *event)
 {
 	struct monr *monr = monr_from_event(event);
-	u64 count;
+	u64 count, recoup, wait_end;
 	u16 pkgid = topology_logical_package_id(smp_processor_id());
 	int err;
 
@@ -1614,6 +1646,15 @@ static int intel_cmt_event_read(struct perf_event *event)
 		return -ENXIO;
 
 	/*
+	 * If rmid has been stolen, only read if enough time has elapsed since
+	 * rmid were recovered.
+	 */
+	recoup = atomic64_read(&monr->last_rmid_recoup);
+	wait_end = recoup + __cmt_pre_mon_slice;
+	if (recoup && time_before64(get_jiffies_64(), wait_end))
+		return -EAGAIN;
+
+	/*
 	 * Only event parent can return a value, everyone else share its
 	 * rmid and therefore doesn't track occupancy independently.
 	 */
@@ -2267,6 +2308,9 @@ static int __init intel_cmt_init(void)
 	struct pkg_data *pkgd = NULL;
 	int err = 0;
 
+	__cmt_pre_mon_slice = msecs_to_jiffies(CMT_DEFAULT_PRE_MON_SLICE);
+	__cmt_min_mon_slice = msecs_to_jiffies(CMT_DEFAULT_MIN_MON_SLICE);
+
 	if (!x86_match_cpu(intel_cmt_match)) {
 		err = -ENODEV;
 		goto err_exit;
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 8bb43bd..8756666 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -52,6 +52,24 @@
  * schedule and read.
  *
  *
+ * Rotation
+ *
+ * The number of rmids in hw is relatively small with respect to the number
+ * of potential monitored resources. rmids are rotated to among pmonrs that
+ * need one to give a fair-ish usage of this resource.
+ *
+ * A hw constraint is that occupancy for a rmid cannot be restarted, therefore
+ * a rmid with llc_occupancy need some time unscheduled until all cache lines
+ * tagged to it are evicted from cache (if this ever happens).
+ *
+ * When a rmid is "rotated", it is stolen from a pmonr and must wait until its
+ * llc_occupancy has decreased enough to be considered "clean". Meanwhile, that
+ * rmid is considered "dirty".
+ *
+ * Rotation logic periodically reads occupancy of this "dirty" rmids and, when
+ * clean, the rmid is either reused or placed in a free pool.
+ *
+ *
  * Locking
  *
  * One global cmt_mutex. One mutex and spin_lock per package.
@@ -62,6 +80,7 @@
  *  cgroup start/stop.
  *  - Hold pkg->mutex and pkg->lock in _all_ active packages to traverse or
  *  change the monr hierarchy.
+ *  - pkgd->mutex: Hold in current package for rotation in that pkgd.
  *  - pkgd->lock: Hold in current package to access that pkgd's members. Hold
  *  a pmonr's package pkgd->lock for non-atomic access to pmonr.
  */
@@ -225,6 +244,7 @@ struct cmt_csd {
  * @dep_dirty_pmonrs:		LRU of Dep_Dirty pmonrs.
  * @dep_pmonrs:			LRU of Dep_Idle and Dep_Dirty pmonrs.
  * @nr_dep_pmonrs:		nr Dep_Idle + nr Dep_Dirty pmonrs.
+ * @nr_dirty_rmids:		"dirty" rmids, both with and without a pmonr.
  * @mutex:			Hold when modifying this pkg_data.
  * @mutex_key:			lockdep class for pkg_data's mutex.
  * @lock:			Hold to protect pmonrs in this pkg_data.
@@ -243,6 +263,7 @@ struct pkg_data {
 	struct list_head	dep_dirty_pmonrs;
 	struct list_head	dep_pmonrs;
 	int			nr_dep_pmonrs;
+	int			nr_dirty_rmids;
 
 	struct mutex		mutex;
 	raw_spinlock_t		lock;
@@ -280,6 +301,10 @@ enum cmt_user_flags {
  * @parent:		Parent in monr hierarchy.
  * @children:		List of children in monr hierarchy.
  * @parent_entry:	Entry in parent's children list.
+ * @last_rmid_recoup:	Last time that nr_dep_pmonrs decreased to zero. It's
+ *			zero if a rmid has never been stolen from this monr.
+ * @nr_dep_pmonrs:	nr of Dep_* pmonrs in this monr. A zero implies that
+ *			monr is monitoring in all required packages.
  * @flags:		monr_flags.
  * @nr_has_user:	nr of CMT_UF_HAS_USER set in events in mon_events.
  * @nr_nolazy_user:	nr of CMT_UF_NOLAZY_RMID set in events in mon_events.
@@ -303,6 +328,9 @@ struct monr {
 	struct list_head		children;
 	struct list_head		parent_entry;
 
+	atomic64_t			last_rmid_recoup;
+	atomic_t			nr_dep_pmonrs;
+
 	enum monr_flags			flags;
 	int				nr_has_user;
 	int				nr_nolazy_rmid;
-- 
2.8.0.rc3.226.g39d4020

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ