lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 29 Oct 2016 17:38:16 -0700
From:   David Carrillo-Cisneros <davidcc@...gle.com>
To:     linux-kernel@...r.kernel.org
Cc:     "x86@...nel.org" <x86@...nel.org>, Ingo Molnar <mingo@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...el.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vegard Nossum <vegard.nossum@...il.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        Nilay Vaish <nilayvaish@...il.com>,
        Borislav Petkov <bp@...e.de>,
        Vikas Shivappa <vikas.shivappa@...ux.intel.com>,
        Ravi V Shankar <ravi.v.shankar@...el.com>,
        Fenghua Yu <fenghua.yu@...el.com>,
        Paul Turner <pjt@...gle.com>,
        Stephane Eranian <eranian@...gle.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>
Subject: [PATCH v3 19/46] perf/x86/intel/cmt: add support for cgroup events

First part of cgroup support for CMT.

A monr's position in monrs hierarchy depends on the position of it's
target cgroup or thread in the cgroup hierarchy.
(See code comments for details).

A monr that monitors a cgroup keeps a reference to in monr->monr_cgroup
and its used in future patches to add support for cgroup monitoring
without requiring an active perf_event at all times.

Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
---
 arch/x86/events/intel/cmt.c | 293 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/intel/cmt.h |   2 +
 2 files changed, 295 insertions(+)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3883cb4..a5b7d2d 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -125,6 +125,14 @@ static inline struct pmonr *pkgd_pmonr(struct pkg_data *pkgd, struct monr *monr)
 	return rcu_dereference_check(monr->pmonrs[pkgd->pkgid], safe);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline struct cgroup_subsys_state *get_root_perf_css(void)
+{
+	/* Get css for root cgroup */
+	return  init_css_set.subsys[perf_event_cgrp_id];
+}
+#endif
+
 static inline void pmonr_set_rmids(struct pmonr *pmonr,
 				   u32 sched_rmid, u32 read_rmid)
 {
@@ -416,6 +424,7 @@ static void monr_dealloc(struct monr *monr)
 
 	if (WARN_ON_ONCE(monr->nr_has_user) ||
 	    WARN_ON_ONCE(monr->nr_nolazy_rmid) ||
+	    WARN_ON_ONCE(monr->mon_cgrp) ||
 	    WARN_ON_ONCE(monr->mon_events))
 		return;
 
@@ -639,6 +648,7 @@ static int monr_apply_uflags(struct monr *monr, enum cmt_user_flags *puflags)
 	goto exit;
 }
 
+/* can be NULL if the monr was for a cgroup that has gone offline. */
 static inline struct monr *monr_from_event(struct perf_event *event)
 {
 	return (struct monr *) READ_ONCE(event->hw.cmt_monr);
@@ -727,6 +737,75 @@ static int monr_append_event(struct monr *monr, struct perf_event *event)
 	return err;
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline struct monr *monr_from_perf_cgroup(struct perf_cgroup *cgrp)
+{
+	return (struct monr *)READ_ONCE(cgrp->arch_info);
+}
+
+static inline void perf_cgroup_set_monr(struct perf_cgroup *cgrp,
+					struct monr *monr)
+{
+	WRITE_ONCE(cgrp->arch_info, monr);
+}
+
+/* Get cgroup for both task and cgroup event. */
+static struct perf_cgroup *perf_cgroup_from_task_event(struct perf_event *event)
+{
+#ifdef CONFIG_LOCKDEP
+	bool rcu_safe = lockdep_is_held(&cmt_mutex);
+#endif
+
+	return container_of(
+		task_css_check(event->hw.target, perf_event_cgrp_id, rcu_safe),
+		struct perf_cgroup, css);
+}
+
+static struct perf_cgroup *perf_cgroup_from_css(struct cgroup_subsys_state *css)
+{
+	return container_of(css, struct perf_cgroup, css);
+}
+
+/**
+ * perf_cgroup_mon_started() - Tell if cgroup is monitored by its own monr.
+ *
+ * A perf_cgroup is being monitored when it is referenced back by
+ * its monr's mon_cgrp. Otherwise, the cgroup only uses the monr used to
+ * monitor another cgroup (the one that is referenced back by monr's mon_cgrp).
+ */
+static inline bool perf_cgroup_mon_started(struct perf_cgroup *cgrp)
+{
+	struct monr *monr;
+
+	/*
+	 * monr can be referenced by a cgroup other than the one in its
+	 * mon_cgrp, be careful.
+	 */
+	monr = monr_from_perf_cgroup(cgrp);
+
+	/* Root monr do not have a cgroup associated before initialization. */
+	return  monr->mon_cgrp == cgrp;
+}
+
+/**
+ * perf_cgroup_find_lma() - Find @cgrp lowest monitored ancestor.
+ *
+ * Find lowest monitored ancestor for @cgrp, not including this cgroup
+ * Return: lma or NULL if no ancestor is monitored.
+ */
+struct perf_cgroup *perf_cgroup_find_lma(struct perf_cgroup *cgrp)
+{
+	struct cgroup_subsys_state *parent_css;
+
+	do {
+		parent_css = cgrp->css.parent;
+		cgrp = parent_css ? perf_cgroup_from_css(parent_css) : NULL;
+	} while (cgrp && !perf_cgroup_mon_started(cgrp));
+	return cgrp;
+}
+
+#endif
+
 /**
  * pmonr_update_sched_rmid() - Update sched_rmid for @pmonr in current package.
  *
@@ -815,6 +894,214 @@ static void monr_hrchy_remove_leaf(struct monr *monr)
 	monr_hrchy_release_locks(&flags);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+
+/* Similar to css_next_descendant_pre but skips the subtree rooted by pos. */
+struct cgroup_subsys_state *
+css_skip_subtree_pre(struct cgroup_subsys_state *pos,
+		     struct cgroup_subsys_state *root)
+{
+	struct cgroup_subsys_state *next;
+
+	while (pos != root) {
+		next = css_next_child(pos, pos->parent);
+		if (next)
+			return next;
+		pos = pos->parent;
+	}
+	return NULL;
+}
+
+/* Make all monrs of css descendants of css to depend on new_monr. */
+inline void css_subtree_update_monr_dependants(struct cgroup_subsys_state *css,
+					       struct monr *new_monr)
+{
+	struct cgroup_subsys_state *pos_css;
+	struct perf_cgroup *pos_cgrp;
+	struct monr *pos_monr;
+	unsigned long flags;
+
+	lockdep_assert_held(&cmt_mutex);
+
+	rcu_read_lock();
+
+	pos_css = css_next_descendant_pre(css, css);
+	while (pos_css) {
+		pos_cgrp = perf_cgroup_from_css(pos_css);
+		pos_monr = monr_from_perf_cgroup(pos_cgrp);
+
+		/* Skip css that are not online, sync'ed with cmt_mutex. */
+		if (!(pos_css->flags & CSS_ONLINE)) {
+			pos_css = css_next_descendant_pre(pos_css, css);
+			continue;
+		}
+		if (!perf_cgroup_mon_started(pos_cgrp)) {
+			perf_cgroup_set_monr(pos_cgrp, new_monr);
+			pos_css = css_next_descendant_pre(pos_css, css);
+			continue;
+		}
+		rcu_read_unlock();
+
+		monr_hrchy_acquire_locks(&flags);
+		pos_monr->parent = new_monr;
+		list_move_tail(&pos_monr->parent_entry, &new_monr->children);
+		monr_hrchy_release_locks(&flags);
+
+		rcu_read_lock();
+		/*
+		 * Skip subtrees rooted by a css that owns a monr, since the
+		 * css in those subtrees use the monr at their subtree root.
+		 */
+		pos_css = css_skip_subtree_pre(pos_css, css);
+	}
+	rcu_read_unlock();
+}
+
+static inline int __css_start_monitoring(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp, *cgrp_lma, *pos_cgrp;
+	struct monr *monr, *monr_parent, *pos_monr, *tmp_monr;
+	unsigned long flags;
+
+	lockdep_assert_held(&cmt_mutex);
+
+	cgrp = perf_cgroup_from_css(css);
+
+	cgrp_lma = perf_cgroup_find_lma(cgrp);
+	if (!cgrp_lma) {
+		perf_cgroup_set_monr(cgrp, monr_hrchy_root);
+		monr_hrchy_root->mon_cgrp = cgrp;
+		return 0;
+	}
+	/*
+	 * The monr for the lowest monitored ancestor is direct ancestor
+	 * of monr in the monr hierarchy.
+	 */
+	monr_parent = monr_from_perf_cgroup(cgrp_lma);
+
+	monr = monr_alloc();
+	if (IS_ERR(monr))
+		return PTR_ERR(monr);
+	/*
+	 * New monr has no children yet so it can be inserted in hierarchy as
+	 * a leaf. Since all monr's pmonr are in Off state, there is no risk
+	 * of pmonr state transitions in the scheduler path.
+	 */
+	monr_hrchy_acquire_locks(&flags);
+	monr_hrchy_insert_leaf(monr, monr_parent);
+	monr_hrchy_release_locks(&flags);
+
+	/*
+	 * Previous lock also works as a barrier to prevent attaching
+	 * the monr to cgrp before it is in monr hierarchy.
+	 */
+	perf_cgroup_set_monr(cgrp, monr);
+	monr->mon_cgrp = cgrp;
+	css_subtree_update_monr_dependants(css, monr);
+
+	monr_hrchy_acquire_locks(&flags);
+	/* Move task-event monrs that are descendant from css's cgroup. */
+	list_for_each_entry_safe(pos_monr, tmp_monr,
+				 &monr_parent->children, parent_entry) {
+		if (pos_monr->mon_cgrp)
+			continue;
+		/*
+		 * all events in event group have the same cgroup.
+		 * No RCU read lock necessary for task_css_check since calling
+		 * inside critical section.
+		 */
+		pos_cgrp = perf_cgroup_from_task_event(pos_monr->mon_events);
+		if (!cgroup_is_descendant(pos_cgrp->css.cgroup,
+					  cgrp->css.cgroup))
+			continue;
+		pos_monr->parent = monr;
+		list_move_tail(&pos_monr->parent_entry, &monr->children);
+	}
+	monr_hrchy_release_locks(&flags);
+
+	return 0;
+}
+
+static inline void __css_stop_monitoring(struct cgroup_subsys_state *css)
+{
+	struct perf_cgroup *cgrp, *cgrp_lma;
+	struct monr *monr, *monr_parent, *pos_monr;
+	unsigned long flags;
+
+	lockdep_assert_held(&cmt_mutex);
+
+	cgrp = perf_cgroup_from_css(css);
+	monr = monr_from_perf_cgroup(cgrp);
+	/*
+	 * When css is root cgroup's css, detach cgroup but do not
+	 * destroy monr.
+	 */
+	cgrp_lma = perf_cgroup_find_lma(cgrp);
+	if (!cgrp_lma) {
+		/* monr of root cgrp must be monr_hrchy_root. */
+		monr->mon_cgrp = NULL;
+		return;
+	}
+
+	monr_parent = monr_from_perf_cgroup(cgrp_lma);
+	css_subtree_update_monr_dependants(css, monr_parent);
+
+	monr_hrchy_acquire_locks(&flags);
+
+	/* Move the children monrs that are no cgroups. */
+	list_for_each_entry(pos_monr, &monr->children, parent_entry)
+		pos_monr->parent = monr_parent;
+	list_splice_tail_init(&monr->children, &monr_parent->children);
+
+	perf_cgroup_set_monr(cgrp, monr_from_perf_cgroup(cgrp_lma));
+	monr->mon_cgrp = NULL;
+	monr_hrchy_remove_leaf(monr);
+
+	monr_hrchy_release_locks(&flags);
+}
+
+static bool is_cgroup_event(struct perf_event *event)
+{
+	return event->cgrp;
+}
+
+static int monr_hrchy_attach_cgroup_event(struct perf_event *event)
+{
+	struct monr *monr;
+	struct perf_cgroup *cgrp = event->cgrp;
+	int err;
+	bool started = false;
+
+	if (!perf_cgroup_mon_started(cgrp)) {
+		css_get(&cgrp->css);
+		err = __css_start_monitoring(&cgrp->css);
+		css_put(&cgrp->css);
+		if (err)
+			return err;
+		started = true;
+	}
+
+	monr = monr_from_perf_cgroup(cgrp);
+	err = monr_append_event(monr, event);
+	if (err && started) {
+		css_get(&cgrp->css);
+		__css_stop_monitoring(&cgrp->css);
+		css_put(&cgrp->css);
+	}
+
+	return err;
+}
+
+/* return monr of cgroup that contains the task to monitor. */
+static struct monr *monr_hrchy_get_monr_parent(struct perf_event *event)
+{
+	struct perf_cgroup *cgrp = perf_cgroup_from_task_event(event);
+
+	return monr_from_perf_cgroup(cgrp);
+}
+
+#else /* CONFIG_CGROUP_PERF */
+
 static bool is_cgroup_event(struct perf_event *event)
 {
 	return false;
@@ -834,6 +1121,8 @@ static struct monr *monr_hrchy_get_monr_parent(struct perf_event *event)
 	return monr_hrchy_root;
 }
 
+#endif
+
 static int monr_hrchy_attach_cpu_event(struct perf_event *event)
 {
 	return monr_append_event(monr_hrchy_root, event);
@@ -883,6 +1172,10 @@ static int monr_hrchy_attach_event(struct perf_event *event)
 
 static void monr_destroy(struct monr *monr)
 {
+#ifdef CONFIG_CGROUP_PERF
+	if (monr->mon_cgrp)
+		__css_stop_monitoring(&monr->mon_cgrp->css);
+#endif
 	monr_hrchy_remove_leaf(monr);
 	monr_dealloc(monr);
 }
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 754a9c8..dc52641 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -252,6 +252,7 @@ enum cmt_user_flags {
 
 /**
  * struct monr - MONitored Resource.
+ * @mon_cgrp:		The cgroup associated with this monr, if any
  * @mon_events:		The head of event's group that use this monr, if any.
  * @entry:		List entry into cmt_event_monrs.
  * @pmonrs:		Per-package pmonrs.
@@ -271,6 +272,7 @@ enum cmt_user_flags {
  * On initialization, all monr's pmonrs start in Off state.
  */
 struct monr {
+	struct perf_cgroup		*mon_cgrp;
 	struct perf_event		*mon_events;
 	struct list_head		entry;
 	struct pmonr			**pmonrs;
-- 
2.8.0.rc3.226.g39d4020

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ