linux-kernel - Re: PATCH? trace_remove_event_call() should fail if call is active

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1372883643.22688.118.camel@gandalf.local.home>
Date:	Wed, 03 Jul 2013 16:34:03 -0400
From:	Steven Rostedt <rostedt@...dmis.org>
To:	Oleg Nesterov <oleg@...hat.com>
Cc:	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>,
	"zhangwei(Jovi)" <jovi.zhangwei@...wei.com>,
	Jiri Olsa <jolsa@...hat.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Arnaldo Carvalho de Melo <acme@...stprotocols.net>,
	Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org
Subject: Re: PATCH? trace_remove_event_call() should fail if call is active

On Wed, 2013-07-03 at 21:17 +0200, Oleg Nesterov wrote:
> On 07/03, Steven Rostedt wrote:
> >
> > On Wed, 2013-07-03 at 19:54 +0200, Oleg Nesterov wrote:
> > > On 07/03, Oleg Nesterov wrote:
> > > >
> > > > IOW. So far _I think_ we just need the additional changes in
> > > > trace_remove_event_call() if it succeeds (with the patch I sent)
> > > > to prevent the races like above, but I didn't even try to think
> > > > about this problem.
> > >
> > > And I guess greatly underestimated the problem(s). When I look at
> > > this code now, it seems that, say, event_enable_write() will use
> > > the already freed ftrace_event_file in this case.
> > >
> > > Still I think this is another (although closely related) problem.
> >
> > Correct, and I think if we fix that problem, it will encapsulate fixing
> > the kprobe race too.
> 
> I do not think so, but I can be easily wrong. Again, we shouldn't
> destroy the event if there is a perf_event attached to this tp_event.
> And we can't (afaics!) rely on TRACE_REG_UNREGISTER from event_remove()
> paths, FTRACE_EVENT_FL_SOFT_MODE can nack it.
> 
> So I still think that we also need something like the patch I sent.
> But please forget about this for the moment.
> 
> Can't we do something like below? Just in case, of course this change
> is incomplete, just to explain what I mean... And of course I how no
> idea if the change in debugfs is safe, I never looked into fs/debugfs
> before. But, perhaps, somehow we can clear i_private under event_mutex
> and kernel/trace can use file_inode() instead of filp->private_data ?
> 
> Oleg.
> 
> 
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index 4888cb3..c23d41e 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c
> @@ -475,6 +475,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
>  				kfree(dentry->d_inode->i_private);
>  				/* fall through */
>  			default:
> +				dentry->d_inode->i_private = NULL;
>  				simple_unlink(parent->d_inode, dentry);
>  				break;
>  			}

No, I would avoid any changes to the debugfs infrastructure.

OK, what about the below patch, followed by an updated version of your
patch. I'll send that as a reply to this one.

-- Steve

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4372658..72ff2c6 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -195,6 +195,7 @@ extern int ftrace_event_reg(struct ftrace_event_call *event,
 			    enum trace_reg type, void *data);
 
 enum {
+	TRACE_EVENT_FL_REF_MAX_BIT = 20,
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_CAP_ANY_BIT,
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
@@ -203,6 +204,8 @@ enum {
 };
 
 /*
+ * Event ref count uses the first 20 bits of the flags field.
+ *
  * Event flags:
  *  FILTERED	  - The event has a filter attached
  *  CAP_ANY	  - Any user can enable for perf
@@ -213,6 +216,7 @@ enum {
  *                     it is best to clear the buffers that used it).
  */
 enum {
+	TRACE_EVENT_FL_REF_MAX		= (1 << TRACE_EVENT_FL_REF_MAX_BIT),
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
@@ -220,6 +224,8 @@ enum {
 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 };
 
+#define TRACE_EVENT_FL_REF_MASK	(TRACE_EVENT_FL_REF_MAX - 1)
+
 struct ftrace_event_call {
 	struct list_head	list;
 	struct ftrace_event_class *class;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7d85429..16f3236 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -391,6 +391,23 @@ static void __get_system_dir(struct ftrace_subsystem_dir *dir)
 	__get_system(dir->subsystem);
 }
 
+static int ftrace_event_call_get(struct ftrace_event_call *call)
+{
+	if ((call->flags & TRACE_EVENT_FL_REF_MASK) == TRACE_EVENT_FL_REF_MAX - 1)
+		return -EBUSY;
+
+	call->flags++;
+	return 0;
+}
+
+static void ftrace_event_call_put(struct ftrace_event_call *call)
+{
+	if (WARN_ON_ONCE(!(call->flags & TRACE_EVENT_FL_REF_MASK)))
+		return;
+
+	call->flags--;
+}
+
 static void __put_system_dir(struct ftrace_subsystem_dir *dir)
 {
 	WARN_ON_ONCE(dir->ref_count == 0);
@@ -424,7 +441,15 @@ static int tracing_open_generic_file(struct inode *inode, struct file *filp)
 
 	ret = tracing_open_generic(inode, filp);
 	if (ret < 0)
-		trace_array_put(tr);
+		goto fail;
+
+	ret = ftrace_event_call_get(file->event_call);
+	if (ret < 0)
+		goto fail;
+
+	return 0;
+ fail:
+	trace_array_put(tr);
 	return ret;
 }
 
@@ -433,12 +458,40 @@ static int tracing_release_generic_file(struct inode *inode, struct file *filp)
 	struct ftrace_event_file *file = inode->i_private;
 	struct trace_array *tr = file->tr;
 
+	ftrace_event_call_put(file->event_call);
 	trace_array_put(tr);
 
 	return 0;
 }
 
 /*
+ * Open and update call ref count.
+ * Must have ftrace_event_call passed in.
+ */
+static int tracing_open_generic_call(struct inode *inode, struct file *filp)
+{
+	struct ftrace_event_call *call = inode->i_private;
+
+	return ftrace_event_call_get(call);
+}
+
+static int tracing_release_generic_call(struct inode *inode, struct file *filp)
+{
+	struct ftrace_event_call *call = inode->i_private;
+
+	ftrace_event_call_put(call);
+	return 0;
+}
+
+static int tracing_seq_release_call(struct inode *inode, struct file *filp)
+{
+	struct ftrace_event_call *call = inode->i_private;
+
+	ftrace_event_call_put(call);
+	return seq_release(inode, filp);
+}
+
+/*
  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
  */
 static int
@@ -949,10 +1002,16 @@ static int trace_format_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	ret = seq_open(file, &trace_format_seq_ops);
+	ret = ftrace_event_call_get(call);
 	if (ret < 0)
 		return ret;
 
+	ret = seq_open(file, &trace_format_seq_ops);
+	if (ret < 0) {
+		ftrace_event_call_put(call);
+		return ret;
+	}
+
 	m = file->private_data;
 	m->private = call;
 
@@ -1260,20 +1319,22 @@ static const struct file_operations ftrace_event_format_fops = {
 	.open = trace_format_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release,
+	.release = tracing_seq_release_call,
 };
 
 static const struct file_operations ftrace_event_id_fops = {
-	.open = tracing_open_generic,
+	.open = tracing_open_generic_call,
 	.read = event_id_read,
 	.llseek = default_llseek,
+	.release = tracing_release_generic_call,
 };
 
 static const struct file_operations ftrace_event_filter_fops = {
-	.open = tracing_open_generic,
+	.open = tracing_open_generic_call,
 	.read = event_filter_read,
 	.write = event_filter_write,
 	.llseek = default_llseek,
+	.release = tracing_release_generic_call,
 };
 
 static const struct file_operations ftrace_subsystem_filter_fops = {


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/