lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 30 Apr 2014 20:44:37 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Vince Weaver <vincent.weaver@...ne.edu>
Cc:	Ingo Molnar <mingo@...nel.org>, linux-kernel@...r.kernel.org,
	Thomas Gleixner <tglx@...utronix.de>,
	Steven Rostedt <rostedt@...dmis.org>
Subject: Re: [perf] more perf_fuzzer memory corruption


Vince, could you add the below to whatever tracing muck you already
have?

After staring at your traces all day with Thomas, we have doubts about
the refcount integrity.



---
 kernel/events/core.c | 146 +++++++++++++++++++++++++++++----------------------
 1 file changed, 82 insertions(+), 64 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5129b1201050..dac01e099f13 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1664,6 +1664,9 @@ event_sched_in(struct perf_event *event,
 	u64 tstamp = perf_event_time(event);
 	int ret = 0;
 
+	WARN_ON(event->ctx != ctx);
+	lockdep_assert_held(&ctx->lock);
+
 	if (event->state <= PERF_EVENT_STATE_OFF)
 		return 0;
 
@@ -2029,14 +2032,9 @@ static int __perf_event_enable(void *info)
 	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
 		goto unlock;
 
-	if (!group_can_go_on(event, cpuctx, 1)) {
-		err = -EEXIST;
-	} else {
-		if (event == leader)
-			err = group_sched_in(event, cpuctx, ctx);
-		else
-			err = event_sched_in(event, cpuctx, ctx);
-	}
+	err = -EEXIST;
+	if (group_can_go_on(event, cpuctx, 1))
+		err = group_sched_in(event, cpuctx, ctx);
 
 	if (err) {
 		/*
@@ -2293,6 +2291,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 	if (!cpuctx->task_ctx)
 		return;
 
+#if 0
 	rcu_read_lock();
 	next_ctx = next->perf_event_ctxp[ctxn];
 	if (!next_ctx)
@@ -2335,6 +2334,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 	}
 unlock:
 	rcu_read_unlock();
+#endif
 
 	if (do_switch) {
 		raw_spin_lock(&ctx->lock);
@@ -3233,10 +3233,18 @@ static void __free_event(struct perf_event *event)
 	if (event->pmu)
 		module_put(event->pmu->module);
 
+	WARN_ON(event->hlist_entry.pprev && event->hlist_entry.pprev != LIST_POISON2);
+
 	call_rcu(&event->rcu_head, free_event_rcu);
 }
-static void free_event(struct perf_event *event)
+
+static void _free_event(struct perf_event *event)
 {
+	long refs = atomic_long_read(&event->refcount);
+
+	WARN(refs, "freeing event with %ld refs left; ptr=0x%p\n", refs, event);
+	trace_printk("freeing with %ld refs; ptr=0x%p\n", refs, event);
+
 	irq_work_sync(&event->pending);
 
 	unaccount_event(event);
@@ -3263,48 +3271,32 @@ static void free_event(struct perf_event *event)
 	if (is_cgroup_event(event))
 		perf_detach_cgroup(event);
 
-
 	__free_event(event);
 }
 
-int perf_event_release_kernel(struct perf_event *event)
+static void free_event(struct perf_event *event)
 {
-	struct perf_event_context *ctx = event->ctx;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	/*
-	 * There are two ways this annotation is useful:
-	 *
-	 *  1) there is a lock recursion from perf_event_exit_task
-	 *     see the comment there.
-	 *
-	 *  2) there is a lock-inversion with mmap_sem through
-	 *     perf_event_read_group(), which takes faults while
-	 *     holding ctx->mutex, however this is called after
-	 *     the last filedesc died, so there is no possibility
-	 *     to trigger the AB-BA case.
-	 */
-	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
-	raw_spin_lock_irq(&ctx->lock);
-	perf_group_detach(event);
-	raw_spin_unlock_irq(&ctx->lock);
-	perf_remove_from_context(event);
-	mutex_unlock(&ctx->mutex);
-
-	free_event(event);
-
-	return 0;
+	if (unlikely(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1)) {
+		WARN(1, "unexpected event refcount: %ld; ptr=0x%p\n",
+				atomic_long_read(&event->refcount),
+				event);
+		return;
+	}
+	_free_event(event);
 }
-EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 
 /*
  * Called when the last reference to the file is gone.
  */
-static void put_event(struct perf_event *event)
+static void put_event(struct perf_event *event, struct perf_event *other)
 {
+	struct perf_event_context *ctx = event->ctx;
 	struct task_struct *owner;
 
-	if (!atomic_long_dec_and_test(&event->refcount))
+	long refs = atomic_long_sub_return(1, &event->refcount);
+	trace_printk("put ref: %ld; ptr=0x%p other=0x%p\n", refs, event, other);
+
+	if (refs)
 		return;
 
 	rcu_read_lock();
@@ -3316,14 +3308,8 @@ static void put_event(struct perf_event *event)
 	 * owner->perf_event_mutex.
 	 */
 	smp_read_barrier_depends();
-	if (owner) {
-		/*
-		 * Since delayed_put_task_struct() also drops the last
-		 * task reference we can safely take a new reference
-		 * while holding the rcu_read_lock().
-		 */
-		get_task_struct(owner);
-	}
+	if (owner && !atomic_inc_not_zero(&owner->usage))
+		owner = NULL;
 	rcu_read_unlock();
 
 	if (owner) {
@@ -3340,12 +3326,39 @@ static void put_event(struct perf_event *event)
 		put_task_struct(owner);
 	}
 
-	perf_event_release_kernel(event);
+	WARN_ON_ONCE(ctx->parent_ctx);
+	/*
+	 * There are two ways this annotation is useful:
+	 *
+	 *  1) there is a lock recursion from perf_event_exit_task
+	 *     see the comment there.
+	 *
+	 *  2) there is a lock-inversion with mmap_sem through
+	 *     perf_event_read_group(), which takes faults while
+	 *     holding ctx->mutex, however this is called after
+	 *     the last filedesc died, so there is no possibility
+	 *     to trigger the AB-BA case.
+	 */
+	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
+	raw_spin_lock_irq(&ctx->lock);
+	perf_group_detach(event);
+	raw_spin_unlock_irq(&ctx->lock);
+	perf_remove_from_context(event);
+	mutex_unlock(&ctx->mutex);
+
+	_free_event(event);
+}
+
+int perf_event_release_kernel(struct perf_event *event)
+{
+	put_event(event, NULL);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 
 static int perf_release(struct inode *inode, struct file *file)
 {
-	put_event(file->private_data);
+	put_event(file->private_data, NULL);
 	return 0;
 }
 
@@ -3969,6 +3982,11 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 			 */
 			continue;
 		}
+
+		trace_printk("inc ref: %ld; ptr=0x%p\n",
+				atomic_long_read(&event->refcount),
+				event);
+
 		rcu_read_unlock();
 
 		mutex_lock(&event->mmap_mutex);
@@ -3988,7 +4006,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 			ring_buffer_put(rb); /* can't be last, we still have one */
 		}
 		mutex_unlock(&event->mmap_mutex);
-		put_event(event);
+		put_event(event, NULL);
 
 		/*
 		 * Restart the iteration; either we're on the wrong list or
@@ -7374,7 +7392,7 @@ static void sync_child_event(struct perf_event *child_event,
 	 * Release the parent event, if this was the last
 	 * reference to it.
 	 */
-	put_event(parent_event);
+	put_event(parent_event, child_event);
 }
 
 static void
@@ -7382,11 +7400,9 @@ __perf_event_exit_task(struct perf_event *child_event,
 			 struct perf_event_context *child_ctx,
 			 struct task_struct *child)
 {
-	if (child_event->parent) {
-		raw_spin_lock_irq(&child_ctx->lock);
-		perf_group_detach(child_event);
-		raw_spin_unlock_irq(&child_ctx->lock);
-	}
+	raw_spin_lock_irq(&child_ctx->lock);
+	perf_group_detach(child_event);
+	raw_spin_unlock_irq(&child_ctx->lock);
 
 	perf_remove_from_context(child_event);
 
@@ -7458,12 +7474,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	mutex_lock(&child_ctx->mutex);
 
 again:
-	list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
-				 group_entry)
-		__perf_event_exit_task(child_event, child_ctx, child);
-
-	list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
-				 group_entry)
+	list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
 		__perf_event_exit_task(child_event, child_ctx, child);
 
 	/*
@@ -7472,8 +7483,10 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * will still point to the list head terminating the iteration.
 	 */
 	if (!list_empty(&child_ctx->pinned_groups) ||
-	    !list_empty(&child_ctx->flexible_groups))
+	    !list_empty(&child_ctx->flexible_groups)) {
+		WARN_ON_ONCE(1);
 		goto again;
+	}
 
 	mutex_unlock(&child_ctx->mutex);
 
@@ -7519,7 +7532,7 @@ static void perf_free_event(struct perf_event *event,
 	list_del_init(&event->child_list);
 	mutex_unlock(&parent->child_mutex);
 
-	put_event(parent);
+	put_event(parent, event);
 
 	perf_group_detach(event);
 	list_del_event(event, ctx);
@@ -7605,6 +7618,11 @@ inherit_event(struct perf_event *parent_event,
 		return NULL;
 	}
 
+	trace_printk("inherit inc ref: %ld; ptr=0x%p other=0x%p\n",
+			atomic_long_read(&parent_event->refcount),
+			parent_event,
+			child_event);
+
 	get_ctx(child_ctx);
 
 	/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ