lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 22 Mar 2021 17:42:14 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Marco Elver <elver@...gle.com>
Cc:     alexander.shishkin@...ux.intel.com, acme@...nel.org,
        mingo@...hat.com, jolsa@...hat.com, mark.rutland@....com,
        namhyung@...nel.org, tglx@...utronix.de, glider@...gle.com,
        viro@...iv.linux.org.uk, arnd@...db.de, christian@...uner.io,
        dvyukov@...gle.com, jannh@...gle.com, axboe@...nel.dk,
        mascasa@...gle.com, pcc@...gle.com, irogers@...gle.com,
        kasan-dev@...glegroups.com, linux-arch@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        x86@...nel.org, linux-kselftest@...r.kernel.org
Subject: Re: [PATCH RFC v2 8/8] selftests/perf: Add kselftest for
 remove_on_exec

On Mon, Mar 22, 2021 at 02:24:40PM +0100, Marco Elver wrote:
> To make compatible with more recent libc, we'll need to fixup the tests
> with the below.

OK, that reprodiced things here, thanks!

The below seems to not explode instantly.... it still has the
alternative version in as well (and I think it might even work too, but
the one I left in seems simpler).

---

 kernel/events/core.c | 154 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 111 insertions(+), 43 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a7220e8c447e..8c0f905cc017 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2167,8 +2172,9 @@ static void perf_group_detach(struct perf_event *event)
 	 * If this is a sibling, remove it from its group.
 	 */
 	if (leader != event) {
+		leader->nr_siblings--;
 		list_del_init(&event->sibling_list);
-		event->group_leader->nr_siblings--;
+		event->group_leader = event;
 		goto out;
 	}
 
@@ -2182,8 +2188,9 @@ static void perf_group_detach(struct perf_event *event)
 		if (sibling->event_caps & PERF_EV_CAP_SIBLING)
 			perf_remove_sibling_event(sibling);
 
-		sibling->group_leader = sibling;
+		leader->nr_siblings--;
 		list_del_init(&sibling->sibling_list);
+		sibling->group_leader = sibling;
 
 		/* Inherit group flags from the previous leader */
 		sibling->group_caps = event->group_caps;
@@ -2360,10 +2367,19 @@ __perf_remove_from_context(struct perf_event *event,
 static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
 	struct perf_event_context *ctx = event->ctx;
+	bool remove;
 
 	lockdep_assert_held(&ctx->mutex);
 
-	event_function_call(event, __perf_remove_from_context, (void *)flags);
+	/*
+	 * There is concurrency vs remove_on_exec().
+	 */
+	raw_spin_lock_irq(&ctx->lock);
+	remove = (event->attach_state & PERF_ATTACH_CONTEXT);
+	raw_spin_unlock_irq(&ctx->lock);
+
+	if (remove)
+		event_function_call(event, __perf_remove_from_context, (void *)flags);
 
 	/*
 	 * The above event_function_call() can NO-OP when it hits
@@ -4232,41 +4248,92 @@ static void perf_event_enable_on_exec(int ctxn)
 static void perf_remove_from_owner(struct perf_event *event);
 static void perf_event_exit_event(struct perf_event *child_event,
 				  struct perf_event_context *child_ctx,
-				  struct task_struct *child);
+				  struct task_struct *child,
+				  bool removed);
 
 /*
  * Removes all events from the current task that have been marked
  * remove-on-exec, and feeds their values back to parent events.
  */
-static void perf_event_remove_on_exec(void)
+static void perf_event_remove_on_exec(int ctxn)
 {
-	int ctxn;
+	struct perf_event_context *ctx, *clone_ctx = NULL;
+	struct perf_event *event, *next;
+	LIST_HEAD(free_list);
+	unsigned long flags;
+	bool modified = false;
 
-	for_each_task_context_nr(ctxn) {
-		struct perf_event_context *ctx;
-		struct perf_event *event, *next;
+	ctx = perf_pin_task_context(current, ctxn);
+	if (!ctx)
+		return;
 
-		ctx = perf_pin_task_context(current, ctxn);
-		if (!ctx)
+	mutex_lock(&ctx->mutex);
+
+	if (WARN_ON_ONCE(ctx->task != current))
+		goto unlock;
+
+	list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
+		if (!event->attr.remove_on_exec)
 			continue;
-		mutex_lock(&ctx->mutex);
 
-		list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
-			if (!event->attr.remove_on_exec)
-				continue;
+		if (!is_kernel_event(event))
+			perf_remove_from_owner(event);
 
-			if (!is_kernel_event(event))
-				perf_remove_from_owner(event);
-			perf_remove_from_context(event, DETACH_GROUP);
-			/*
-			 * Remove the event and feed back its values to the
-			 * parent event.
-			 */
-			perf_event_exit_event(event, ctx, current);
-		}
-		mutex_unlock(&ctx->mutex);
-		put_ctx(ctx);
+		modified = true;
+
+		perf_remove_from_context(event, !!event->parent * DETACH_GROUP);
+		perf_event_exit_event(event, ctx, current, true);
+	}
+
+	raw_spin_lock_irqsave(&ctx->lock, flags);
+	if (modified)
+		clone_ctx = unclone_ctx(ctx);
+	--ctx->pin_count;
+	raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
+#if 0
+	struct perf_cpu_context *cpuctx;
+
+	if (!modified) {
+		perf_unpin_context(ctx);
+		goto unlock;
+	}
+
+	local_irq_save(flags);
+	cpuctx = __get_cpu_context(ctx);
+	perf_ctx_lock(cpuctx, ctx);
+	task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
+
+	list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
+		if (!event->attr.remove_on_exec)
+			continue;
+
+		if (event->parent)
+			perf_group_detach(event);
+		list_del_event(event, ctx);
+
+		list_add(&event->active_list, &free_list);
+	}
+
+	ctx_resched(cpuctx, ctx, EVENT_ALL);
+
+	clone_ctx = unclone_ctx(ctx);
+	--ctx->pin_count;
+	perf_ctx_unlock(cpuctx, ctx);
+	local_irq_restore(flags);
+
+	list_for_each_entry_safe(event, next, &free_list, active_entry) {
+		list_del(&event->active_entry);
+		perf_event_exit_event(event, ctx, current, true);
 	}
+#endif
+
+unlock:
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+	if (clone_ctx)
+		put_ctx(clone_ctx);
 }
 
 struct perf_read_data {
@@ -7615,20 +7682,18 @@ void perf_event_exec(void)
 	struct perf_event_context *ctx;
 	int ctxn;
 
-	rcu_read_lock();
 	for_each_task_context_nr(ctxn) {
-		ctx = current->perf_event_ctxp[ctxn];
-		if (!ctx)
-			continue;
-
 		perf_event_enable_on_exec(ctxn);
+		perf_event_remove_on_exec(ctxn);
 
-		perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
-				   true);
+		rcu_read_lock();
+		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+		if (ctx) {
+			perf_iterate_ctx(ctx, perf_event_addr_filters_exec,
+					 NULL, true);
+		}
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
-
-	perf_event_remove_on_exec();
 }
 
 struct remote_output {
@@ -12509,7 +12574,7 @@ static void sync_child_event(struct perf_event *child_event,
 static void
 perf_event_exit_event(struct perf_event *child_event,
 		      struct perf_event_context *child_ctx,
-		      struct task_struct *child)
+		      struct task_struct *child, bool removed)
 {
 	struct perf_event *parent_event = child_event->parent;
 
@@ -12526,12 +12591,15 @@ perf_event_exit_event(struct perf_event *child_event,
 	 * and being thorough is better.
 	 */
 	raw_spin_lock_irq(&child_ctx->lock);
-	WARN_ON_ONCE(child_ctx->is_active);
+	if (!removed) {
+		WARN_ON_ONCE(child_ctx->is_active);
 
-	if (parent_event)
-		perf_group_detach(child_event);
-	list_del_event(child_event, child_ctx);
-	perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
+		if (parent_event)
+			perf_group_detach(child_event);
+		list_del_event(child_event, child_ctx);
+	}
+	if (child_event->state >= PERF_EVENT_STATE_EXIT)
+		perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
 	raw_spin_unlock_irq(&child_ctx->lock);
 
 	/*
@@ -12617,7 +12685,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	perf_event_task(child, child_ctx, 0);
 
 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-		perf_event_exit_event(child_event, child_ctx, child);
+		perf_event_exit_event(child_event, child_ctx, child, false);
 
 	mutex_unlock(&child_ctx->mutex);
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ