lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190625084904.GY3463@hirez.programming.kicks-ass.net>
Date:   Tue, 25 Jun 2019 10:49:04 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Mark Rutland <mark.rutland@....com>
Cc:     Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Arnaldo Carvalho de Melo <acme@...hat.com>,
        Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
        jolsa@...hat.com, dvyukov@...gle.com, namhyung@...nel.org,
        xiexiuqi@...wei.com,
        syzbot+a24c397a29ad22d86c98@...kaller.appspotmail.com
Subject: Re: [RFC PATCH] perf: Paper over the hw.target problems

On Mon, Jun 24, 2019 at 02:19:02PM +0200, Peter Zijlstra wrote:
> 
> 	close()						clone()
> 
> 							  copy_process()
> 							    perf_event_init_task()
> 							      perf_event_init_context()
> 							        mutex_lock(parent_ctx->mutex)
> 								inherit_task_group()
> 								  inherit_group()
> 								    inherit_event()
> 								      mutex_lock(event->child_mutex)
> 								      // expose event on child list
> 								      list_add_tail()
> 								      mutex_unlock(event->child_mutex)
> 							        mutex_unlock(parent_ctx->mutex)
> 
> 							    ...
> 							    goto bad_fork_*
> 
> 							  bad_fork_cleanup_perf:
> 							    perf_event_free_task()
> 
> 	  perf_release()
> 	    perf_event_release_kernel()
> 	      list_for_each_entry()
> 		mutex_lock(ctx->mutex)
> 		mutex_lock(event->child_mutex)
> 		// event is from the failing inherit
> 		// on the other CPU
> 		perf_remove_from_context()
> 		list_move()
> 		mutex_unlock(event->child_mutex)
> 		mutex_unlock(ctx->mutex)
> 
> 							      mutex_lock(ctx->mutex)
> 							      list_for_each_entry_safe()
> 							        // event already stolen
> 							      mutex_unlock(ctx->mutex)
> 
> 							    delayed_free_task()
> 							      free_task()
> 
> 	     list_for_each_entry_safe()
> 	       list_del()
> 	       free_event()
> 	         _free_event()
> 		   // and so event->hw.target
> 		   // is the already freed failed clone()
> 		   if (event->hw.target)
> 		     put_task_struct(event->hw.target)
> 		       // WHOOPSIE, already quite dead
> 
> 
> Which puts the lie to the the comment on perf_event_free_task();
> 'unexposed, unused context' my ass.
> 
> Which is a 'fun' confluence of fail; copy_process() doing an
> unconditional free_task() and not respecting refcounts, and perf having
> creative locking. In particular:
> 
>   82d94856fa22 ("perf/core: Fix lock inversion between perf,trace,cpuhp")
> 
> seems to have overlooked this 'fun' parade.

The below seems to cure things; it uses the fact that detached events
still have a reference count on their context.

So perf_event_free_task() can detect when (child) events have gotten
stolen and wait for it.

The below (which includes a debug printk) confirms the reproducer
triggered the problem by printing a 2 (where a 1 is expected).

Thoughts?

---
 kernel/events/core.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 10c1dba9068c..e19d036125d1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4463,12 +4463,16 @@ static void _free_event(struct perf_event *event)
 	if (event->destroy)
 		event->destroy(event);
 
-	if (event->ctx)
-		put_ctx(event->ctx);
-
+	/*
+	 * Must be after ->destroy(), due to uprobe_perf_close() using
+	 * hw.target, but before put_ctx() because of perf_event_free_task().
+	 */
 	if (event->hw.target)
 		put_task_struct(event->hw.target);
 
+	if (event->ctx)
+		put_ctx(event->ctx);
+
 	exclusive_event_destroy(event);
 	module_put(event->pmu->module);
 
@@ -4648,10 +4652,20 @@ int perf_event_release_kernel(struct perf_event *event)
 	mutex_unlock(&event->child_mutex);
 
 	list_for_each_entry_safe(child, tmp, &free_list, child_list) {
+		void *var = &child->ctx->refcount;
+
 		list_del(&child->child_list);
 		free_event(child);
+
+		/*
+		 * Wake any perf_event_free_task() waiting for this event to be
+		 * freed.
+		 */
+		smp_mb(); /* pairs with wait_var_event() */
+		wake_up_var(var);
 	}
 
+
 no_ctx:
 	put_event(event); /* Must be the 'last' reference */
 	return 0;
@@ -11546,7 +11560,19 @@ void perf_event_free_task(struct task_struct *task)
 			perf_free_event(event, ctx);
 
 		mutex_unlock(&ctx->mutex);
-		put_ctx(ctx);
+
+		/*
+		 * perf_event_release_kernel() could've stolen some of our
+		 * child events and still have them on its free_list. In that
+		 * case it can do free_event() after the failing copy_process()
+		 * has already freed the task, and get a UaF on
+		 * event->hw.target.
+		 *
+		 * Wait for all events to drop their context reference.
+		 */
+		printk("%d\n", refcount_read(&ctx->refcount));
+		wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
+		put_ctx(ctx); /* must be last */
 	}
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ