lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri,  6 Jan 2017 13:56:47 -0800
From:   Vikas Shivappa <vikas.shivappa@...ux.intel.com>
To:     vikas.shivappa@...el.com, vikas.shivappa@...ux.intel.com
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org, hpa@...or.com,
        tglx@...utronix.de, mingo@...nel.org, peterz@...radead.org,
        ravi.v.shankar@...el.com, tony.luck@...el.com,
        fenghua.yu@...el.com, andi.kleen@...el.com, h.peter.anvin@...el.com
Subject: [PATCH 05/12] x86/cqm,perf/core: Cgroup support prepare

From: David Carrillo-Cisneros <davidcc@...gle.com>

cgroup hierarchy monitoring is not supported currently. This patch
builds all the necessary datastructures, cgroup APIs like alloc, free
etc and necessary quirks for supporting cgroup hierarchy monitoring in
later patches.

- Introduce a architecture specific data structure arch_info in
perf_cgroup to keep track of RMIDs and cgroup hierarchical monitoring.
- perf sched_in calls all the cgroup ancestors when a cgroup is
scheduled in. This will not work with cqm as we have a common per pkg
rmid associated with one task and hence cannot write different RMIds
into the MSR for each event. cqm driver enables a flag
PERF_EV_CGROUP_NO_RECURSION which indicates the perf to not call all
ancestor cgroups for each event and let the driver handle the hierarchy
monitoring for cgroup.
- Introduce event_terminate as event_destroy is called after cgrp is
disassociated from the event to support rmid handling of the cgroup.
This helps cqm clean up the cqm specific arch_info.
- Add the cgroup APIs for alloc,free,attach and can_attach

The above framework will be used to build different cgroup features in
later patches.

Tests: Same as before. Cgroup still doesnt work but we did the prep to
get it to work

Patch modified/refactored by Vikas Shivappa
<vikas.shivappa@...ux.intel.com> to support recycling removal.

Signed-off-by: Vikas Shivappa <vikas.shivappa@...ux.intel.com>
---
 arch/x86/events/intel/cqm.c       | 19 ++++++++++++++++++-
 arch/x86/include/asm/perf_event.h | 27 +++++++++++++++++++++++++++
 include/linux/perf_event.h        | 32 ++++++++++++++++++++++++++++++++
 kernel/events/core.c              | 28 +++++++++++++++++++++++++++-
 4 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 68fd1da..a9bd7bd 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -741,7 +741,13 @@ static int intel_cqm_event_init(struct perf_event *event)
 	INIT_LIST_HEAD(&event->hw.cqm_group_entry);
 	INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
 
-	event->destroy = intel_cqm_event_destroy;
+	/*
+	 * CQM driver handles cgroup recursion and since only noe
+	 * RMID can be programmed at the time in each core, then
+	 * it is incompatible with the way generic code handles
+	 * cgroup hierarchies.
+	 */
+	event->event_caps |= PERF_EV_CAP_CGROUP_NO_RECURSION;
 
 	mutex_lock(&cache_mutex);
 
@@ -918,6 +924,17 @@ static int intel_cqm_event_init(struct perf_event *event)
 	.read		     = intel_cqm_event_read,
 	.count		     = intel_cqm_event_count,
 };
+#ifdef CONFIG_CGROUP_PERF
+int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+				      struct cgroup_subsys_state *new_css)
+{}
+void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css)
+{}
+void perf_cgroup_arch_attach(struct cgroup_taskset *tset)
+{}
+int perf_cgroup_arch_can_attach(struct cgroup_taskset *tset)
+{}
+#endif
 
 static inline void cqm_pick_event_reader(int cpu)
 {
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f353061..f38c7f0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -299,4 +299,31 @@ static inline void perf_check_microcode(void) { }
 
 #define arch_perf_out_copy_user copy_from_user_nmi
 
+/*
+ * Hooks for architecture specific features of perf_event cgroup.
+ * Currently used by Intel's CQM.
+ */
+#ifdef CONFIG_INTEL_RDT_M
+#ifdef CONFIG_CGROUP_PERF
+
+#define perf_cgroup_arch_css_alloc	perf_cgroup_arch_css_alloc
+
+int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+				      struct cgroup_subsys_state *new_css);
+
+#define perf_cgroup_arch_css_free	perf_cgroup_arch_css_free
+
+void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_attach		perf_cgroup_arch_attach
+
+void perf_cgroup_arch_attach(struct cgroup_taskset *tset);
+
+#define perf_cgroup_arch_can_attach	perf_cgroup_arch_can_attach
+
+int perf_cgroup_arch_can_attach(struct cgroup_taskset *tset);
+
+#endif
+
+#endif
 #endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a8f4749..410642a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -300,6 +300,12 @@ struct pmu {
 	int (*event_init)		(struct perf_event *event);
 
 	/*
+	 * Terminate the event for this PMU. Optional complement for a
+	 * successful event_init. Called before the event fields are tear down.
+	 */
+	void (*event_terminate)		(struct perf_event *event);
+
+	/*
 	 * Notification that the event was mapped or unmapped.  Called
 	 * in the context of the mapping task.
 	 */
@@ -516,9 +522,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
  * PERF_EV_CAP_SOFTWARE: Is a software event.
  * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
  * from any CPU in the package where it is active.
+ * PERF_EV_CAP_CGROUP_NO_RECURSION: A cgroup event that handles its own
+ * cgroup scoping. It does not need to be enabled for all of its descendants
+ * cgroups.
  */
 #define PERF_EV_CAP_SOFTWARE		BIT(0)
 #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
+#define PERF_EV_CAP_CGROUP_NO_RECURSION	BIT(2)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
@@ -823,6 +833,8 @@ struct perf_cgroup_info {
 };
 
 struct perf_cgroup {
+	/* Architecture specific information. */
+	void				 *arch_info;
 	struct cgroup_subsys_state	css;
 	struct perf_cgroup_info	__percpu *info;
 };
@@ -844,6 +856,7 @@ struct perf_cgroup {
 
 #ifdef CONFIG_PERF_EVENTS
 
+extern int is_cgroup_event(struct perf_event *event);
 extern void *perf_aux_output_begin(struct perf_output_handle *handle,
 				   struct perf_event *event);
 extern void perf_aux_output_end(struct perf_output_handle *handle,
@@ -1387,4 +1400,23 @@ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
 #define perf_event_exit_cpu	NULL
 #endif
 
+/*
+ * Hooks for architecture specific extensions for perf_cgroup.
+ */
+#ifndef perf_cgroup_arch_css_alloc
+#define perf_cgroup_arch_css_alloc(parent_css, new_css) 0
+#endif
+
+#ifndef perf_cgroup_arch_css_free
+#define perf_cgroup_arch_css_free(css) do { } while (0)
+#endif
+
+#ifndef perf_cgroup_arch_attach
+#define perf_cgroup_arch_attach(tskset) do { } while (0)
+#endif
+
+#ifndef perf_cgroup_arch_can_attach
+#define perf_cgroup_arch_can_attach(tskset) 0
+#endif
+
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab15509..229f611 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -590,6 +590,9 @@ static inline u64 perf_event_clock(struct perf_event *event)
 	if (!cpuctx->cgrp)
 		return false;
 
+	if (event->event_caps & PERF_EV_CAP_CGROUP_NO_RECURSION)
+		return cpuctx->cgrp->css.cgroup == event->cgrp->css.cgroup;
+
 	/*
 	 * Cgroup scoping is recursive.  An event enabled for a cgroup is
 	 * also enabled for all its descendant cgroups.  If @cpuctx's
@@ -606,7 +609,7 @@ static inline void perf_detach_cgroup(struct perf_event *event)
 	event->cgrp = NULL;
 }
 
-static inline int is_cgroup_event(struct perf_event *event)
+int is_cgroup_event(struct perf_event *event)
 {
 	return event->cgrp != NULL;
 }
@@ -4019,6 +4022,9 @@ static void _free_event(struct perf_event *event)
 		mutex_unlock(&event->mmap_mutex);
 	}
 
+	if (event->pmu->event_terminate)
+		event->pmu->event_terminate(event);
+
 	if (is_cgroup_event(event))
 		perf_detach_cgroup(event);
 
@@ -9246,6 +9252,8 @@ static void account_event(struct perf_event *event)
 	exclusive_event_destroy(event);
 
 err_pmu:
+	if (event->pmu->event_terminate)
+		event->pmu->event_terminate(event);
 	if (event->destroy)
 		event->destroy(event);
 	module_put(pmu->module);
@@ -10748,6 +10756,7 @@ static int __init perf_event_sysfs_init(void)
 perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct perf_cgroup *jc;
+	int ret;
 
 	jc = kzalloc(sizeof(*jc), GFP_KERNEL);
 	if (!jc)
@@ -10759,6 +10768,12 @@ static int __init perf_event_sysfs_init(void)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	jc->arch_info = NULL;
+
+	ret = perf_cgroup_arch_css_alloc(parent_css, &jc->css);
+	if (ret)
+		return ERR_PTR(ret);
+
 	return &jc->css;
 }
 
@@ -10766,6 +10781,8 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
 {
 	struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);
 
+	perf_cgroup_arch_css_free(css);
+
 	free_percpu(jc->info);
 	kfree(jc);
 }
@@ -10786,11 +10803,20 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
 
 	cgroup_taskset_for_each(task, css, tset)
 		task_function_call(task, __perf_cgroup_move, task);
+
+	perf_cgroup_arch_attach(tset);
+}
+
+static int perf_cgroup_can_attach(struct cgroup_taskset *tset)
+{
+	return perf_cgroup_arch_can_attach(tset);
 }
 
+
 struct cgroup_subsys perf_event_cgrp_subsys = {
 	.css_alloc	= perf_cgroup_css_alloc,
 	.css_free	= perf_cgroup_css_free,
+	.can_attach	= perf_cgroup_can_attach,
 	.attach		= perf_cgroup_attach,
 };
 #endif /* CONFIG_CGROUP_PERF */
-- 
1.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ