linux-kernel - Re: [for-next][PATCH 2/5] tracing: Add set_event

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20151119232427.GZ5184@linux.vnet.ibm.com>
Date:	Thu, 19 Nov 2015 15:24:27 -0800
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Steven Rostedt <rostedt@...dmis.org>
Cc:	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...nel.org>,
	Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [for-next][PATCH 2/5] tracing: Add set_event_pid directory for
 future use

On Thu, Oct 29, 2015 at 03:07:56AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rostedt@...dmis.org>
> 
> Create a tracing directory called set_event_pid, which currently has no
> function, but will be used to filter all events for the tracing instance or
> the pids that are added to the file.
> 
> The reason no functionality is added with this commit is that this commit
> focuses on the creation and removal of the pids in a safe manner. And tests
> can be made against this change to make sure things are correct before
> hooking features to the list of pids.
> 
> Cc: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
> Signed-off-by: Steven Rostedt <rostedt@...dmis.org>
> ---
>  kernel/trace/trace.h        |   7 ++
>  kernel/trace/trace_events.c | 287 ++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 294 insertions(+)
> 
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index fb8a61c710ea..250481043bb5 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -176,6 +176,12 @@ struct trace_options {
>  	struct trace_option_dentry	*topts;
>  };
> 
> +struct trace_pid_list {
> +	unsigned int			nr_pids;
> +	int				order;
> +	pid_t				*pids;
> +};
> +
>  /*
>   * The trace array - an array of per-CPU trace arrays. This is the
>   * highest level data structure that individual tracers deal with.
> @@ -201,6 +207,7 @@ struct trace_array {
>  	bool			allocated_snapshot;
>  	unsigned long		max_latency;
>  #endif
> +	struct trace_pid_list	__rcu *filtered_pids;
>  	/*
>  	 * max_lock is used to protect the swapping of buffers
>  	 * when taking a max snapshot. The buffers themselves are
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index d120cfe3cca7..2ad7014707ee 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -15,8 +15,10 @@
>  #include <linux/kthread.h>
>  #include <linux/tracefs.h>
>  #include <linux/uaccess.h>
> +#include <linux/bsearch.h>
>  #include <linux/module.h>
>  #include <linux/ctype.h>
> +#include <linux/sort.h>
>  #include <linux/slab.h>
>  #include <linux/delay.h>
> 
> @@ -445,6 +447,43 @@ static void ftrace_clear_events(struct trace_array *tr)
>  	mutex_unlock(&event_mutex);
>  }
> 
> +static int cmp_pid(const void *key, const void *elt)
> +{
> +	const pid_t *search_pid = key;
> +	const pid_t *pid = elt;
> +
> +	if (*search_pid == *pid)
> +		return 0;
> +	if (*search_pid < *pid)
> +		return -1;
> +	return 1;
> +}
> +
> +static void __ftrace_clear_event_pids(struct trace_array *tr)
> +{
> +	struct trace_pid_list *pid_list;
> +
> +	pid_list = rcu_dereference_protected(tr->filtered_pids,
> +					     lockdep_is_held(&event_mutex));
> +	if (!pid_list)
> +		return;
> +
> +	rcu_assign_pointer(tr->filtered_pids, NULL);
> +
> +	/* Wait till all users are no longer using pid filtering */
> +	synchronize_sched();
> +
> +	free_pages((unsigned long)pid_list->pids, pid_list->order);
> +	kfree(pid_list);
> +}
> +
> +static void ftrace_clear_event_pids(struct trace_array *tr)
> +{
> +	mutex_lock(&event_mutex);
> +	__ftrace_clear_event_pids(tr);
> +	mutex_unlock(&event_mutex);
> +}
> +
>  static void __put_system(struct event_subsystem *system)
>  {
>  	struct event_filter *filter = system->filter;
> @@ -777,6 +816,56 @@ static void t_stop(struct seq_file *m, void *p)
>  	mutex_unlock(&event_mutex);
>  }
> 
> +static void *p_start(struct seq_file *m, loff_t *pos)
> +{
> +	struct trace_pid_list *pid_list;
> +	struct trace_array *tr = m->private;
> +
> +	/*
> +	 * Grab the mutex, to keep calls to p_next() having the same
> +	 * tr->filtered_pids as p_start() has.
> +	 * If we just passed the tr->filtered_pids around, then RCU would
> +	 * have been enough, but doing that makes things more complex.
> +	 */
> +	mutex_lock(&event_mutex);
> +	rcu_read_lock_sched();

This looks interesting...  You hold the mutex, which I am guessing
blocks changes.  Then why the need for rcu_read_lock_sched()?

						Thanx, Paul

> +
> +	pid_list = rcu_dereference_sched(tr->filtered_pids);
> +
> +	if (!pid_list || *pos >= pid_list->nr_pids)
> +		return NULL;
> +
> +	return (void *)&pid_list->pids[*pos];
> +}
> +
> +static void p_stop(struct seq_file *m, void *p)
> +{
> +	rcu_read_unlock_sched();
> +	mutex_unlock(&event_mutex);
> +}
> +
> +static void *
> +p_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	struct trace_array *tr = m->private;
> +	struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
> +
> +	(*pos)++;
> +
> +	if (*pos >= pid_list->nr_pids)
> +		return NULL;
> +
> +	return (void *)&pid_list->pids[*pos];
> +}
> +
> +static int p_show(struct seq_file *m, void *v)
> +{
> +	pid_t *pid = v;
> +
> +	seq_printf(m, "%d\n", *pid);
> +	return 0;
> +}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/