linux-kernel - [PATCH v2 4/7] perf: avoid a bounded set of visit_groups

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190724223746.153620-5-irogers@google.com>
Date:   Wed, 24 Jul 2019 15:37:43 -0700
From:   Ian Rogers <irogers@...gle.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        linux-kernel@...r.kernel.org
Cc:     Kan Liang <kan.liang@...ux.intel.com>,
        Stephane Eranian <eranian@...gle.com>,
        Ian Rogers <irogers@...gle.com>
Subject: [PATCH v2 4/7] perf: avoid a bounded set of visit_groups_merge iterators

Create a per-cpu array of iterators that gets resized when cgroup events
are added. The size of the array reflects the maximum depth of cgroups,
although not all cgroups will have events monitored within them. This
approach avoids added storage cost to perf_event.

Signed-off-by: Ian Rogers <irogers@...gle.com>
---
 include/linux/perf_event.h |  2 +
 kernel/events/core.c       | 87 +++++++++++++++++++++++++++++++-------
 2 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e8ad3c590a23..43f90cfa2c39 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -802,6 +802,8 @@ struct perf_cpu_context {
 #ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
 	struct list_head		cgrp_cpuctx_entry;
+	struct perf_event		**visit_groups_merge_iterator_storage;
+	int			       visit_groups_merge_iterator_storage_size;
 #endif
 
 	struct list_head		sched_cb_entry;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4d70df0415b9..2a2188908bed 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1709,6 +1709,20 @@ perf_event_groups_next(struct perf_event *event)
 	return next;
 }
 
+#ifdef CONFIG_CGROUP_PERF
+int perf_event_cgroup_depth(struct perf_event *event)
+{
+	struct cgroup_subsys_state *css;
+	struct perf_cgroup *cgrp = event->cgrp;
+	int depth = 0;
+
+	if (cgrp)
+		for (css = &cgrp->css; css; css = css->parent)
+			depth++;
+	return depth;
+}
+#endif
+
 /*
  * Iterate through the whole groups tree.
  */
@@ -2590,6 +2604,7 @@ static int  __perf_install_in_context(void *info)
 
 #ifdef CONFIG_CGROUP_PERF
 	if (is_cgroup_event(event)) {
+		int max_iterators;
 		/*
 		 * If the current cgroup doesn't match the event's
 		 * cgroup, we should not try to schedule it.
@@ -2597,6 +2612,30 @@ static int  __perf_install_in_context(void *info)
 		struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
 		reprogram = cgroup_is_descendant(cgrp->css.cgroup,
 					event->cgrp->css.cgroup);
+
+		/*
+		 * Ensure space for visit_groups_merge iterator storage. With
+		 * cgroup profiling we may have an event at each depth plus
+		 * system wide events.
+		 */
+		max_iterators = perf_event_cgroup_depth(event) + 1;
+		if (max_iterators >
+		    cpuctx->visit_groups_merge_iterator_storage_size) {
+			struct perf_event **storage =
+			   krealloc(cpuctx->visit_groups_merge_iterator_storage,
+				    sizeof(struct perf_event *) * max_iterators,
+				    GFP_KERNEL);
+			if (storage) {
+				cpuctx->visit_groups_merge_iterator_storage
+						= storage;
+				cpuctx->visit_groups_merge_iterator_storage_size
+						= max_iterators;
+			} else {
+				WARN_ONCE(1, "Unable to increase iterator "
+					"storage for perf events with cgroups");
+				ret = -ENOMEM;
+			}
+		}
 	}
 #endif
 
@@ -3394,6 +3433,13 @@ static void min_heap_pop_push(struct perf_event_heap *heap,
 	}
 }
 
+
+/*
+ * Without cgroups, with a task context, there may be per-CPU and any
+ * CPU events.
+ */
+#define MIN_VISIT_GROUP_MERGE_ITERATORS 2
+
 static int visit_groups_merge(struct perf_event_context *ctx,
 			      struct perf_cpu_context *cpuctx,
 			      struct perf_event_groups *groups,
@@ -3405,22 +3451,25 @@ static int visit_groups_merge(struct perf_event_context *ctx,
 {
 	/*
 	 * A set of iterators, the iterator for the visit is chosen by the
-	 * group_index. The size of the array is sized such that there is space:
-	 * - for task contexts per-CPU and any-CPU events can be iterated.
-	 * - for CPU contexts:
-	 *   - without cgroups, global events can be iterated.
-	 *   - with cgroups, global events can be iterated and 16 sets of cgroup
-	 *     events. Cgroup events may monitor a cgroup at an arbitrary
-	 *     location within the cgroup hierarchy. An iterator is needed for
-	 *     each cgroup with events in the hierarchy. Potentially this is
-	 *     only limited by MAX_PATH.
-	 */
-	struct perf_event *itrs[IS_ENABLED(CONFIG_CGROUP_PERF) ? 17 : 2];
+	 * group_index.
+	 */
+#ifndef CONFIG_CGROUP_PERF
+	struct perf_event *itrs[MIN_VISIT_GROUP_MERGE_ITERATORS];
 	struct perf_event_heap heap = {
 		.storage = itrs,
 		.num_elements = 0,
-		.max_elements = ARRAYSIZE(itrs)
+		.max_elements = MIN_VISIT_GROUP_MERGE_ITERATORS
+	};
+#else
+	/*
+	 * With cgroups usage space in the CPU context reserved for iterators.
+	 */
+	struct perf_event_heap heap = {
+		.storage = cpuctx->visit_groups_merge_iterator_storage,
+		.num_elements = 0,
+		.max_elements = cpuctx->visit_groups_merge_iterator_storage_size
 	};
+#endif
 	int ret, cpu = smp_processor_id();
 
 	heap.storage[0] = perf_event_groups_first(groups, cpu, NULL);
@@ -3455,9 +3504,8 @@ static int visit_groups_merge(struct perf_event_context *ctx,
 					heap.num_elements++;
 					if (heap.num_elements ==
 					    heap.max_elements) {
-						WARN_ONCE(
-				     max_cgroups_with_events_depth,
-				     "Insufficient iterators for cgroup depth");
+						WARN_ONCE(1,
+						"per-CPU min-heap under sized");
 						break;
 					}
 				}
@@ -10167,7 +10215,14 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
-
+#ifdef CONFIG_CGROUP_PERF
+		cpuctx->visit_groups_merge_iterator_storage =
+				kmalloc_array(MIN_VISIT_GROUP_MERGE_ITERATORS,
+					      sizeof(struct perf_event *),
+					      GFP_KERNEL);
+		cpuctx->visit_groups_merge_iterator_storage_size =
+				MIN_VISIT_GROUP_MERGE_ITERATORS;
+#endif
 		__perf_mux_hrtimer_init(cpuctx, cpu);
 	}
 
-- 
2.22.0.709.g102302147b-goog