linux-kernel - [PATCH] perf: Fix exclusive events' grouping

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20190626091409.47637-1-alexander.shishkin@linux.intel.com>
Date:   Wed, 26 Jun 2019 12:14:09 +0300
From:   Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To:     Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:     Arnaldo Carvalho de Melo <acme@...hat.com>,
        Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
        mathieu.poirier@...aro.org, will.deacon@....com,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [PATCH] perf: Fix exclusive events' grouping

So far, we tried to disallow grouping exclusive events for the fear of
complications they would cause with moving between contexts. Specifically,
moving a software group to a hardware context would violate the exclusivity
rules if both groups contain matching exclusive events.

This attempt was, however, unsuccessful: the check that we have in the
perf_event_open() syscall is both wrong (looks at wrong PMU) and
insufficient (group leader may still be exclusive), as can be illustrated
by running

$ perf record -e '{intel_pt//,cycles}' uname
$ perf record -e '{cycles,intel_pt//}' uname

ultimately successfully.

Furthermore, we are completely free to trigger the exclusivity violation
by -e '{cycles,intel_pt//}' -e '{intel_pt//,instructions}', even though
the helpful perf record will not allow that, the ABI will. The warning
later in the perf_event_open() path will also not trigger, because it's
also wrong.

Fix all this by validating the original group before moving, getting rid
of broken safeguards and placing a useful one to perf_install_in_context().
And while at it, place exclusive events on their own list to avoid
iterating through all events.

Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Fixes: bed5b25ad9c8a ("perf: Add a pmu capability for "exclusive" events")
---
 include/linux/perf_event.h | 12 +++++++++++
 kernel/events/core.c       | 41 ++++++++++++++++++++++++++------------
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2ddae518dce6..3fdb5cbd22ae 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -574,6 +574,12 @@ struct perf_event {
 	 */
 	struct list_head		sibling_list;
 	struct list_head		active_list;
+
+	/*
+	 * Link to ctx::exclusive_list
+	 */
+	struct list_head		exclusive_entry;
+
 	/*
 	 * Node on the pinned or flexible tree located at the event context;
 	 */
@@ -739,6 +745,7 @@ struct perf_event_context {
 	struct perf_event_groups	pinned_groups;
 	struct perf_event_groups	flexible_groups;
 	struct list_head		event_list;
+	struct list_head		exclusive_list;
 
 	struct list_head		pinned_active;
 	struct list_head		flexible_active;
@@ -1054,6 +1061,11 @@ static inline int in_software_context(struct perf_event *event)
 	return event->ctx->pmu->task_ctx_nr == perf_sw_context;
 }
 
+static inline int is_exclusive_pmu(struct pmu *pmu)
+{
+	return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
+}
+
 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 23efe6792abc..7a159af28206 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1688,6 +1688,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	if (event->attr.inherit_stat)
 		ctx->nr_stat++;
 
+	if (is_exclusive_pmu(event->pmu))
+		list_add_rcu(&event->exclusive_entry, &ctx->exclusive_list);
 	ctx->generation++;
 }
 
@@ -1871,6 +1873,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
 	list_del_rcu(&event->event_entry);
 
+	if (is_exclusive_pmu(event->pmu))
+		list_del_rcu(&event->exclusive_entry);
+
 	if (event->group_leader == event)
 		del_event_from_groups(event, ctx);
 
@@ -2553,6 +2558,9 @@ static int  __perf_install_in_context(void *info)
 	return ret;
 }
 
+static bool exclusive_event_installable(struct perf_event *event,
+					struct perf_event_context *ctx);
+
 /*
  * Attach a performance event to a context.
  *
@@ -2567,6 +2575,8 @@ perf_install_in_context(struct perf_event_context *ctx,
 
 	lockdep_assert_held(&ctx->mutex);
 
+	WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
+
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
@@ -4071,6 +4081,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->event_list);
 	INIT_LIST_HEAD(&ctx->pinned_active);
 	INIT_LIST_HEAD(&ctx->flexible_active);
+	INIT_LIST_HEAD(&ctx->exclusive_list);
 	refcount_set(&ctx->refcount, 1);
 }
 
@@ -4360,7 +4371,7 @@ static int exclusive_event_init(struct perf_event *event)
 {
 	struct pmu *pmu = event->pmu;
 
-	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+	if (!is_exclusive_pmu(pmu))
 		return 0;
 
 	/*
@@ -4391,7 +4402,7 @@ static void exclusive_event_destroy(struct perf_event *event)
 {
 	struct pmu *pmu = event->pmu;
 
-	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+	if (!is_exclusive_pmu(pmu))
 		return;
 
 	/* see comment in exclusive_event_init() */
@@ -4411,17 +4422,18 @@ static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
 	return false;
 }
 
-/* Called under the same ctx::mutex as perf_install_in_context() */
 static bool exclusive_event_installable(struct perf_event *event,
 					struct perf_event_context *ctx)
 {
 	struct perf_event *iter_event;
 	struct pmu *pmu = event->pmu;
 
-	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+	lockdep_assert_held(&ctx->mutex);
+
+	if (!is_exclusive_pmu(pmu))
 		return true;
 
-	list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+	list_for_each_entry(iter_event, &ctx->exclusive_list, exclusive_entry) {
 		if (exclusive_event_match(iter_event, event))
 			return false;
 	}
@@ -10917,11 +10929,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_alloc;
 	}
 
-	if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
-		err = -EBUSY;
-		goto err_context;
-	}
-
 	/*
 	 * Look up the group leader (we will attach this event to it):
 	 */
@@ -11009,6 +11016,17 @@ SYSCALL_DEFINE5(perf_event_open,
 				move_group = 0;
 			}
 		}
+
+		/*
+		 * Failure to create exclusive events returns -EBUSY.
+		 */
+		err = -EBUSY;
+		if (!exclusive_event_installable(group_leader, ctx))
+			goto err_locked;
+
+		for_each_sibling_event(sibling, group_leader)
+			if (!exclusive_event_installable(sibling, ctx))
+				goto err_locked;
 	} else {
 		mutex_lock(&ctx->mutex);
 	}
@@ -11045,9 +11063,6 @@ SYSCALL_DEFINE5(perf_event_open,
 	 * because we need to serialize with concurrent event creation.
 	 */
 	if (!exclusive_event_installable(event, ctx)) {
-		/* exclusive and group stuff are assumed mutually exclusive */
-		WARN_ON_ONCE(move_group);
-
 		err = -EBUSY;
 		goto err_locked;
 	}
-- 
2.20.1