lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 29 Oct 2016 17:38:11 -0700
From:   David Carrillo-Cisneros <davidcc@...gle.com>
To:     linux-kernel@...r.kernel.org
Cc:     "x86@...nel.org" <x86@...nel.org>, Ingo Molnar <mingo@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vegard Nossum <vegard.nossum@...il.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        Nilay Vaish <nilayvaish@...il.com>,
        Borislav Petkov <bp@...e.de>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Fenghua Yu <fenghua.yu@...el.com>,
        Paul Turner <pjt@...gle.com>,
        Stephane Eranian <eranian@...gle.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>
Subject: [PATCH v3 14/46] perf/x86/intel/cmt: add Active and Dep_{Idle, Dirty} states

Add remaining states for pmonr's state machine:
  - Active: A pmonr that is actively used.
  - Dep_Idle: A pmonr that failed to obtain a rmid. It "borrows" its rmid
    from its lowest monitored (Active in same pkgd) ancestor in the
    monr hierarchy.
  - Dep_Dirty: A pmonr that was Active but has lost its rmid (due to rmid
    rotation, introduced later in this patch series). It is similar to
    Dep_Idle but keeps track of its former rmid in case there is a reuse
    opportunity in the future.

This patch adds states, states transition functions for pmonrs.
It also adds infrastructure and usage statistics to struct pkg_data that
will be used later in this series.

The transitions Unused -> Active and Unused -> Dep_Idle are inline because
they will be called during tasks context switch the first time a monr
runs in a package (later in this series).

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
 arch/x86/events/intel/cmt.c | 237 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/intel/cmt.h |  95 +++++++++++++++++-
 2 files changed, 329 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index fb6877f..86c3013 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -142,6 +142,10 @@ static struct pmonr *pmonr_alloc(struct pkg_data *pkgd)
 	if (!pmonr)
 		return ERR_PTR(-ENOMEM);
 
+	/*pmonr_deps_{head, entry} are in a union, initialize one of them. */
+	INIT_LIST_HEAD(&pmonr->pmonr_deps_head);
+	INIT_LIST_HEAD(&pmonr->pkgd_deps_entry);
+	INIT_LIST_HEAD(&pmonr->rot_entry);
 	pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID);
 	pmonr->pkgd = pkgd;
 
@@ -153,9 +157,108 @@ static inline bool monr_is_root(struct monr *monr)
 	return monr_hrchy_root == monr;
 }
 
+/*
+ * @a must be distinct to @b.
+ * @true if @a is ancestor of @b or equal to it.
+ */
+static inline bool monr_hrchy_is_ancestor(struct monr *a, struct monr *b)
+{
+	if (monr_hrchy_root == a || a == b)
+		return true;
+	if (monr_hrchy_root == b)
+		return false;
+
+	b = b->parent;
+	/* Break at the root */
+	while (b != monr_hrchy_root) {
+		if (a == b)
+			return true;
+		b = b->parent;
+	}
+
+	return false;
+}
+
+/**
+ * pmonr_find_lma() - Find Lowest Monitored Ancestor (lma) of a pmonr.
+ * @pmonr:		The pmonr to start the search on.
+ *
+ * Always succeed since pmonrs in monr_hrchy_root are always in Active state.
+ * Return: lma of @pmonr.
+ */
+static struct pmonr *pmonr_find_lma(struct pmonr *pmonr)
+{
+	struct monr *monr = pmonr->monr;
+	struct pkg_data *pkgd = pmonr->pkgd;
+
+	lockdep_assert_held(&pkgd->lock);
+
+	while ((monr = monr->parent)) {
+		/* protected by pkgd lock. */
+		pmonr = pkgd_pmonr(pkgd, monr);
+		if (pmonr->state == PMONR_ACTIVE)
+			return pmonr;
+	}
+	/* Should have hit monr_hrchy_root. */
+	WARN_ON_ONCE(true);
+
+	return pkgd_pmonr(pkgd, monr_hrchy_root);
+}
+
+/**
+ * pmnor_move_all_dependants() - Move all dependants from @old lender to @new.
+ * @old: Old lender.
+ * @new: New lender.
+ *
+ * @new->monr must be ancestor of @old->monr and they must be distinct.
+ */
+static void pmonr_move_all_dependants(struct pmonr *old, struct pmonr *new)
+{
+	struct pmonr *dep;
+	union pmonr_rmids dep_rmids, new_rmids;
+
+	new_rmids.value = atomic64_read(&new->atomic_rmids);
+	/* Update this pmonr's dependants to depend on new lender. */
+	list_for_each_entry(dep, &old->pmonr_deps_head, pmonr_deps_entry) {
+		dep->lender = new;
+		dep_rmids.value = atomic64_read(&dep->atomic_rmids);
+		pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid);
+	}
+	list_splice_tail_init(&old->pmonr_deps_head, &new->pmonr_deps_head);
+}
+
+/**
+ * pmonr_move_dependants() -  Move some dependants from @old lender to @new.
+ *
+ * Move @old's dependants that are @new->monr descendants to be @new's
+ * dependants. An opposed to pmonr_move_all_dependants, @new->monr does not
+ * need to be an ancestor of @old->monr.
+ */
+static inline void pmonr_move_dependants(struct pmonr *old, struct pmonr *new)
+{
+	struct pmonr *dep, *tmp;
+	union pmonr_rmids dep_rmids, new_rmids;
+
+	new_rmids.value = atomic64_read(&new->atomic_rmids);
+
+	list_for_each_entry_safe(dep, tmp, &old->pmonr_deps_head,
+				 pmonr_deps_entry) {
+		if (!monr_hrchy_is_ancestor(new->monr, dep->monr))
+			continue;
+		list_move_tail(&dep->pmonr_deps_entry, &new->pmonr_deps_head);
+		dep->lender = new;
+		dep_rmids.value = atomic64_read(&dep->atomic_rmids);
+		pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid);
+	}
+}
+
 /* pkg_data lock is not required for transition from Off state. */
 static void pmonr_to_unused(struct pmonr *pmonr)
 {
+	struct pkg_data *pkgd = pmonr->pkgd;
+	struct pmonr *lender;
+	union pmonr_rmids rmids;
+
 	/*
 	 * Do not warn on re-entering Unused state to simplify cleanup
 	 * of initialized pmonrs that were not scheduled.
@@ -168,6 +271,98 @@ static void pmonr_to_unused(struct pmonr *pmonr)
 		pmonr_set_rmids(pmonr, INVALID_RMID, 0);
 		return;
 	}
+
+	lockdep_assert_held(&pkgd->lock);
+	rmids.value = atomic64_read(&pmonr->atomic_rmids);
+
+	if (pmonr->state == PMONR_ACTIVE) {
+		if (monr_is_root(pmonr->monr)) {
+			WARN_ON_ONCE(!list_empty(&pmonr->pmonr_deps_head));
+		} else {
+			lender = pmonr_find_lma(pmonr);
+			pmonr_move_all_dependants(pmonr, lender);
+		}
+		__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+
+	} else if (pmonr->state == PMONR_DEP_IDLE ||
+		   pmonr->state == PMONR_DEP_DIRTY) {
+
+		pmonr->lender = NULL;
+		list_del_init(&pmonr->pmonr_deps_entry);
+		list_del_init(&pmonr->pkgd_deps_entry);
+
+		if (pmonr->state == PMONR_DEP_DIRTY)
+			__set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+		else
+			pkgd->nr_dep_pmonrs--;
+	} else {
+		WARN_ON_ONCE(true);
+		return;
+	}
+
+	list_del_init(&pmonr->rot_entry);
+	pmonr->state = PMONR_UNUSED;
+	pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID);
+}
+
+static inline void __pmonr_to_active_helper(struct pmonr *pmonr, u32 rmid)
+{
+	struct pkg_data *pkgd = pmonr->pkgd;
+
+	list_move_tail(&pmonr->rot_entry, &pkgd->active_pmonrs);
+	pmonr->state = PMONR_ACTIVE;
+	pmonr_set_rmids(pmonr, rmid, rmid);
+	atomic64_set(&pmonr->last_enter_active, get_jiffies_64());
+}
+
+static inline void pmonr_unused_to_active(struct pmonr *pmonr, u32 rmid)
+{
+	struct pmonr *lender;
+
+	__clear_bit(rmid, pmonr->pkgd->free_rmids);
+	__pmonr_to_active_helper(pmonr, rmid);
+	/*
+	 * If monr is root, no ancestor exists to move pmonr to. If monr is
+	 * root's child, no dependants of its parent (root) could be moved.
+	 * Check both cases separately to avoid unnecessary calls to
+	 * pmonr_move_depandants.
+	 */
+	if (!monr_is_root(pmonr->monr) && !monr_is_root(pmonr->monr->parent)) {
+		lender = pmonr_find_lma(pmonr);
+		pmonr_move_dependants(lender, pmonr);
+	}
+}
+
+/* helper function for transitions to Dep_{Idle,Dirty} states. */
+static inline void __pmonr_to_dep_helper(
+	struct pmonr *pmonr, struct pmonr *lender, u32 read_rmid)
+{
+	struct pkg_data *pkgd = pmonr->pkgd;
+	union pmonr_rmids lender_rmids;
+
+	pmonr->lender = lender;
+	list_move_tail(&pmonr->pmonr_deps_entry, &lender->pmonr_deps_head);
+	list_move_tail(&pmonr->pkgd_deps_entry, &pkgd->dep_pmonrs);
+
+	if (read_rmid == INVALID_RMID) {
+		list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs);
+		pkgd->nr_dep_pmonrs++;
+		pmonr->state = PMONR_DEP_IDLE;
+	} else {
+		list_move_tail(&pmonr->rot_entry, &pkgd->dep_dirty_pmonrs);
+		pmonr->state = PMONR_DEP_DIRTY;
+	}
+
+	lender_rmids.value = atomic64_read(&lender->atomic_rmids);
+	pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+}
+
+static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
+{
+	struct pmonr *lender;
+
+	lender = pmonr_find_lma(pmonr);
+	__pmonr_to_dep_helper(pmonr, lender, INVALID_RMID);
 }
 
 static void pmonr_unused_to_off(struct pmonr *pmonr)
@@ -176,6 +371,43 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
 	pmonr_set_rmids(pmonr, INVALID_RMID, 0);
 }
 
+static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
+{
+	struct pmonr *lender;
+	union pmonr_rmids rmids;
+
+	lender = pmonr_find_lma(pmonr);
+	pmonr_move_all_dependants(pmonr, lender);
+
+	rmids.value = atomic64_read(&pmonr->atomic_rmids);
+	__pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+}
+
+static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
+{
+	list_del_init(&pmonr->pkgd_deps_entry);
+	/* pmonr will no longer be dependent on pmonr_lender. */
+	list_del_init(&pmonr->pmonr_deps_entry);
+	pmonr_move_dependants(pmonr->lender, pmonr);
+	pmonr->lender = NULL;
+	__pmonr_to_active_helper(pmonr, rmid);
+}
+
+static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
+{
+	__clear_bit(rmid, pmonr->pkgd->free_rmids);
+	pmonr->pkgd->nr_dep_pmonrs--;
+	__pmonr_dep_to_active_helper(pmonr, rmid);
+}
+
+static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
+{
+	union pmonr_rmids rmids;
+
+	rmids.value = atomic64_read(&pmonr->atomic_rmids);
+	__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
+}
+
 static void monr_dealloc(struct monr *monr)
 {
 	u16 p, nr_pkgs = topology_max_packages();
@@ -780,6 +1012,11 @@ static struct pkg_data *alloc_pkg_data(int cpu)
 		pkgd->max_rmid = CMT_MAX_NR_RMIDS - 1;
 	}
 
+	INIT_LIST_HEAD(&pkgd->active_pmonrs);
+	INIT_LIST_HEAD(&pkgd->dep_idle_pmonrs);
+	INIT_LIST_HEAD(&pkgd->dep_dirty_pmonrs);
+	INIT_LIST_HEAD(&pkgd->dep_pmonrs);
+
 	mutex_init(&pkgd->mutex);
 	raw_spin_lock_init(&pkgd->lock);
 
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 05325c8..bf90c26 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -36,6 +36,21 @@
  * online cpu. The pmonr handles the CMT and MBM monitoring within its package
  * by managing the rmid to write into each CPU that runs a monitored thread.
  *
+ * The lma of a pmonr is its closest ancestor pmonr in Active state pmonr.
+ *
+ * A pmonr allocates a rmid when needed, depending of its state (see
+ * enum pmonr_state comments). If a pmonr fails to obtain a free rmid, it
+ * "borrows" the one used by its Lowest Monitored Ancestor (lma).
+ *
+ * The "borrowed" rmid is used when threads are scheduled in so that the
+ * occupancy and memory bandwidth for those threads is accounted for in the
+ * monr hierarchy. Yet, that pmonr cannot use a "borrowed" rmid to read,
+ * since that rmid is not counting the "borrower"'s monr cache events.
+ * Therefore, a pmonr uses rmids in two ways:
+ *   (1) to schedule, and (2) to read.
+ * When a pmonr owns a rmid (Active state), that rmid is used for both
+ * schedule and read.
+ *
  *
  * Locking
  *
@@ -56,6 +71,16 @@
  *   - Off:	  pmonr is unavailable for monitoring. It's the starting state.
  *   - Unused:	  pmonr is available for monitoring but no thread associated to
  *		  this pmonr's monr has been scheduled in this pmonr's package.
+ *   - Active:	  pmonr is actively used. It successfully obtained a free rmid
+ *		  to sched in/out and uses it to read pmonr's llc_occupancy.
+ *   - Dep_Idle:  pmonr failed to obtain its own free rmid and is borrowing the
+ *		  rmid from its lowest Active ancestor monr (its lma monr).
+ *   - Dep_Dirty: pmonr was Active but its rmid was stolen. This state differs
+ *		  from Dep_Idle in that the pmonr keeps a reference to its
+ *		  former Active rmid. If the pmonr becomes eligible to recoup
+ *		  its rmid in the near future, this previously used rmid can
+ *		  be reused even if "dirty" without introducing additional
+ *		  counting error.
  *
  * The valid state transitions are:
  *
@@ -64,11 +89,37 @@
  *  Off		|  Unused	monitoring is enabled for a pmonr.
  *-----------------------------------------------------------------------------
  *  Unused	|  Off		monitoring is disabled for a pmonr.
+ *		|--------------------------------------------------------------
+ *		|  Active	First thread associated to pmonr is scheduled
+ *		|		in package and a free rmid is available.
+ *		|--------------------------------------------------------------
+ *		|  Dep_Idle	Could not find a free rmid available.
+ *-----------------------------------------------------------------------------
+ *  Active	|  Dep_Dirty	rmid is stolen, keep reference to old rmid
+ *		|		in read_rmid, but do not used to read.
+ *		|--------------------------------------------------------------
+ *		|  Unused	pmonr releases the rmid, released rmid can be
+ *		|		"dirty" and therefore goes to dirty_rmids.
+ *-----------------------------------------------------------------------------
+ *  Dep_Idle	|  Active	pmonr receives a "clean" rmid.
+ *		|--------------------------------------------------------------
+ *		|  Unused	pmonr is no longer waiting for rmid.
+ *-----------------------------------------------------------------------------
+ *  Dep_Dirty	|  Active	dirty rmid is reissued to pmonr that had it
+ *		|		before the transition.
+ *		|--------------------------------------------------------------
+ *		|  Dep_Idle	dirty rmid has become "clean" and is reissued
+ *		|		to a distinct pmonr (or go to free_rmids).
+ *		|--------------------------------------------------------------
+ *		|  Unused	pmonr is no longer waiting for rmid.
  *-----------------------------------------------------------------------------
  */
 enum pmonr_state {
 	PMONR_OFF = 0,
 	PMONR_UNUSED,
+	PMONR_ACTIVE,
+	PMONR_DEP_IDLE,
+	PMONR_DEP_DIRTY,
 };
 
 /**
@@ -81,11 +132,11 @@ enum pmonr_state {
  * Its values can also used to atomically read the state (preventing
  * unnecessary locks of pkgd->lock) in the following way:
  *					pmonr state
- *	      |      Off         Unused
+ *	      |      Off         Unused       Active      Dep_Idle     Dep_Dirty
  * ============================================================================
- * sched_rmid |	INVALID_RMID  INVALID_RMID
+ * sched_rmid |	INVALID_RMID  INVALID_RMID    valid       lender's     lender's
  * ----------------------------------------------------------------------------
- *  read_rmid |	INVALID_RMID        0
+ *  read_rmid |	INVALID_RMID        0	      (same)    INVALID_RMID   old rmid
  *
  */
 union pmonr_rmids {
@@ -98,16 +149,42 @@ union pmonr_rmids {
 
 /**
  * struct pmonr - per-package componet of MONitored Resources (monr).
+ * @lender:		if in Dep_Idle or Dep_Dirty state, it's the pmonr that
+ *			lends its rmid to this pmonr. NULL otherwise.
+ * @pmonr_deps_head:	List of pmonrs in Dep_Idle or Dep_Dirty state that
+ *			borrow their sched_rmid from this pmonr.
+ * @pmonr_deps_entry:	Entry into lender's @pmonr_deps_head when in Dep_Idle
+ *			or Dep_Dirty state.
+ * @pkgd_deps_entry:	When in Dep_Dirty state, the list entry for dep_pmonrs.
  * @monr:		The monr that contains this pmonr.
  * @pkgd:		The package data associated with this pmonr.
+ * @rot_entry:		List entry to attach to pmonr rotation lists in
+ *			pkg_data.
+
+ * @last_enter_active:	Time last enter Active state.
  * @atomic_rmids:	Atomic accesor for this pmonr_rmids.
  * @state:		The state for this pmonr, note that this can also
  *			be inferred from the combination of sched_rmid and
  *			read_rmid in @atomic_rmids.
  */
 struct pmonr {
+	struct pmonr				*lender;
+	/* save space with union since pmonr is in only one state at a time. */
+	union{
+		struct { /* variables for Active state. */
+			struct list_head	pmonr_deps_head;
+		};
+		struct { /* variables for Dep_Idle and Dep_Dirty states. */
+			struct list_head	pmonr_deps_entry;
+			struct list_head	pkgd_deps_entry;
+		};
+	};
+
 	struct monr				*monr;
 	struct pkg_data				*pkgd;
+	struct list_head			rot_entry;
+
+	atomic64_t				last_enter_active;
 
 	/* all writers are sync'ed by package's lock. */
 	atomic64_t				atomic_rmids;
@@ -130,7 +207,13 @@ struct pmonr {
  * @free_rmids:			Pool of free rmids.
  * @dirty_rmids:		Pool of "dirty" rmids that are not referenced
  *				by a pmonr.
+ * @active_pmonrs:		LRU of Active pmonrs.
+ * @dep_idle_pmonrs:		LRU of Dep_Idle pmonrs.
+ * @dep_dirty_pmonrs:		LRU of Dep_Dirty pmonrs.
+ * @dep_pmonrs:			LRU of Dep_Idle and Dep_Dirty pmonrs.
+ * @nr_dep_pmonrs:		nr Dep_Idle + nr Dep_Dirty pmonrs.
  * @mutex:			Hold when modifying this pkg_data.
+ * @mutex_key:			lockdep class for pkg_data's mutex.
  * @lock:			Hold to protect pmonrs in this pkg_data.
  * @work_cpu:			CPU to run rotation and other batch jobs.
  *				It must be in the package associated to its
@@ -142,6 +225,12 @@ struct pkg_data {
 	unsigned long		free_rmids[CMT_MAX_NR_RMIDS_LONGS];
 	unsigned long		dirty_rmids[CMT_MAX_NR_RMIDS_LONGS];
 
+	struct list_head	active_pmonrs;
+	struct list_head	dep_idle_pmonrs;
+	struct list_head	dep_dirty_pmonrs;
+	struct list_head	dep_pmonrs;
+	int			nr_dep_pmonrs;
+
 	struct mutex		mutex;
 	raw_spinlock_t		lock;
 
-- 
2.8.0.rc3.226.g39d4020

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ