lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 1 Dec 2010 19:29:16 +0100
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	Peter Zijlstra <peterz@...radead.org>
Cc:	Corey Ashford <cjashfor@...ux.vnet.ibm.com>,
	Ingo Molnar <mingo@...e.hu>,
	LKML <linux-kernel@...r.kernel.org>,
	Stephane Eranian <eranian@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Steven Rostedt <rostedt@...dmis.org>
Subject: Re: [BUG(?)] perf_events: combining multiple tracepoint events
 into a group produces no counts on member events

On Wed, Dec 01, 2010 at 01:04:37PM +0100, Peter Zijlstra wrote:
> @@ -4833,7 +5101,8 @@ static int perf_event_set_filter(struct 
>  	char *filter_str;
>  	int ret;
>  
> -	if (event->attr.type != PERF_TYPE_TRACEPOINT)
> +	if (event->attr.type != PERF_TYPE_TRACEPOINT ||
> +	    event->attr.config == ~0ULL)
>  		return -EINVAL;
>  
>  	filter_str = strndup_user(arg, PAGE_SIZE);
> @@ -4851,6 +5120,74 @@ static void perf_event_free_filter(struc
>  	ftrace_profile_free_filter(event);
>  }
>  
> +static int perf_event_add_tp(struct perf_event *event, int tp_id)
> +{
> +	if (event->attr.type != PERF_TYPE_TRACEPOINT &&

Should it be || instead?

> +	    event->attr.config != ~0ULL)
> +		return -EINVAL;
> +
> +	return __perf_event_add_tp(event, tp_id);
> +}
> +
> +/*
> + * Called from the exit path, _after_ all events have been detached from it.
> + */
> +static void perf_tp_event_exit(struct task_struct *tsk)
> +{
> +	struct perf_tp_idr *idr = tsk->perf_tp_idr;
> +
> +	if (!idr)
> +		return;
> +
> +	idr_remove_all(&idr->idr);
> +	idr_destroy(&idr->idr);
> +}
> +
> +static void perf_tp_event_delayed_put(struct task_struct *tsk)
> +{
> +	struct perf_tp_idr *idr = tsk->perf_tp_idr;
> +
> +	tsk->perf_tp_idr = NULL;
> +	kfree(idr);
> +}
> +
> +static int perf_tp_inherit_idr(int id, void *p, void *data)
> +{
> +	struct perf_event *child = data;
> +
> +	return __perf_event_add_tp(child, id);
> +}
> +
> +static int perf_tp_event_inherit(struct perf_event *parent_event,
> +				 struct perf_event *child_event)
> +{
> +	int ret;
> +
> +	if (parent_event->attr.type != PERF_TYPE_TRACEPOINT ||
> +	    parent_event->attr.config != ~0ULL)
> +		return 0;
> +
> +	/*
> +	 * The child is not yet exposed, hence no need to serialize things
> +	 * on that side.
> +	 */
> +	mutex_lock(&parent_event->tp_idr.lock);
> +	ret = idr_for_each(&parent_event->tp_idr.idr,
> +			perf_tp_inherit_idr,
> +			child_event);
> +	mutex_unlock(&parent_event->tp_idr.lock);
> +
> +	return ret;
> +}
> +
> +static void perf_tp_event_init_task(struct task_struct *child)
> +{
> +	/*
> +	 * Clear the idr pointer copied from the parent.
> +	 */
> +	child->perf_tp_idr = NULL;
> +}
> +
>  #else
>  
>  static inline void perf_tp_register(void)
> @@ -4866,6 +5203,29 @@ static void perf_event_free_filter(struc
>  {
>  }
>  
> +static int perf_event_add_tp(struct perf_event *event, int tp_id)
> +{
> +	return -ENOENT;
> +}
> +
> +static void perf_tp_event_exit(struct task_struct *tsk)
> +{
> +}
> +
> +static void perf_tp_event_delayed_put(struct task_struct *tsk)
> +{
> +}
> +
> +static int perf_tp_event_inherit(struct perf_event *parent_event,
> +				 struct perf_event *child_event)
> +{
> +	return 0;
> +}
> +
> +static void perf_tp_event_init_task()(struct task_struct *child)
> +{
> +}
> +
>  #endif /* CONFIG_EVENT_TRACING */
>  
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> @@ -5290,6 +5650,9 @@ perf_event_alloc(struct perf_event_attr 
>  	INIT_LIST_HEAD(&event->sibling_list);
>  	init_waitqueue_head(&event->waitq);
>  	init_irq_work(&event->pending, perf_pending_event);
> +#ifdef CONFIG_EVENT_TRACING
> +	perf_tp_idr_init(&event->tp_idr);
> +#endif
>  
>  	mutex_init(&event->mmap_mutex);
>  
> @@ -5308,6 +5671,7 @@ perf_event_alloc(struct perf_event_attr 
>  
>  	if (task) {
>  		event->attach_state = PERF_ATTACH_TASK;
> +		event->hw.event_target = task;
>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
>  		/*
>  		 * hw_breakpoint is a bit difficult here..
> @@ -5353,7 +5717,7 @@ perf_event_alloc(struct perf_event_attr 
>  	if (err) {
>  		if (event->ns)
>  			put_pid_ns(event->ns);
> -		kfree(event);
> +		free_event(event);
>  		return ERR_PTR(err);
>  	}
>  
> @@ -5694,7 +6058,6 @@ SYSCALL_DEFINE5(perf_event_open,
>  	}
>  
>  	perf_install_in_context(ctx, event, cpu);
> -	++ctx->generation;
>  	mutex_unlock(&ctx->mutex);
>  
>  	event->owner = current;
> @@ -5763,7 +6126,6 @@ perf_event_create_kernel_counter(struct 
>  	WARN_ON_ONCE(ctx->parent_ctx);
>  	mutex_lock(&ctx->mutex);
>  	perf_install_in_context(ctx, event, cpu);
> -	++ctx->generation;
>  	mutex_unlock(&ctx->mutex);
>  
>  	return event;
> @@ -5936,6 +6298,8 @@ void perf_event_exit_task(struct task_st
>  
>  	for_each_task_context_nr(ctxn)
>  		perf_event_exit_task_context(child, ctxn);
> +
> +	perf_tp_event_exit(child);
>  }
>  
>  static void perf_free_event(struct perf_event *event,
> @@ -5998,6 +6362,8 @@ void perf_event_delayed_put(struct task_
>  
>  	for_each_task_context_nr(ctxn)
>  		WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
> +
> +	perf_tp_event_delayed_put(task);
>  }
>  
>  /*
> @@ -6013,6 +6379,7 @@ inherit_event(struct perf_event *parent_
>  {
>  	struct perf_event *child_event;
>  	unsigned long flags;
> +	int ret;
>  
>  	/*
>  	 * Instead of creating recursive hierarchies of events,
> @@ -6030,6 +6397,13 @@ inherit_event(struct perf_event *parent_
>  					   NULL);
>  	if (IS_ERR(child_event))
>  		return child_event;
> +
> +	ret = perf_tp_event_inherit(parent_event, child_event);
> +	if (ret) {
> +		free_event(child_event);
> +		return ERR_PTR(ret);
> +	}
> +
>  	get_ctx(child_ctx);
>  
>  	/*
> @@ -6134,9 +6508,7 @@ inherit_task_group(struct perf_event *ev
>  		child->perf_event_ctxp[ctxn] = child_ctx;
>  	}
>  
> -	ret = inherit_group(event, parent, parent_ctx,
> -			    child, child_ctx);
> -
> +	ret = inherit_group(event, parent, parent_ctx, child, child_ctx);
>  	if (ret)
>  		*inherited_all = 0;
>  
> @@ -6157,9 +6529,6 @@ int perf_event_init_context(struct task_
>  
>  	child->perf_event_ctxp[ctxn] = NULL;
>  
> -	mutex_init(&child->perf_event_mutex);
> -	INIT_LIST_HEAD(&child->perf_event_list);
> -
>  	if (likely(!parent->perf_event_ctxp[ctxn]))
>  		return 0;
>  
> @@ -6236,6 +6605,11 @@ int perf_event_init_task(struct task_str
>  {
>  	int ctxn, ret;
>  
> +	mutex_init(&child->perf_event_mutex);
> +	INIT_LIST_HEAD(&child->perf_event_list);
> +
> +	perf_tp_event_init_task(child);
> +
>  	for_each_task_context_nr(ctxn) {
>  		ret = perf_event_init_context(child, ctxn);
>  		if (ret)
> Index: linux-2.6/kernel/trace/trace_event_perf.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_event_perf.c
> +++ linux-2.6/kernel/trace/trace_event_perf.c
> @@ -8,6 +8,7 @@
>  #include <linux/module.h>
>  #include <linux/kprobes.h>
>  #include "trace.h"
> +#include "trace_output.h"
>  
>  static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
>  
> @@ -47,9 +48,7 @@ static int perf_trace_event_perm(struct 
>  static int perf_trace_event_init(struct ftrace_event_call *tp_event,
>  				 struct perf_event *p_event)
>  {
> -	struct hlist_head __percpu *list;
>  	int ret;
> -	int cpu;
>  
>  	ret = perf_trace_event_perm(tp_event, p_event);
>  	if (ret)
> @@ -61,15 +60,6 @@ static int perf_trace_event_init(struct 
>  
>  	ret = -ENOMEM;
>  
> -	list = alloc_percpu(struct hlist_head);
> -	if (!list)
> -		goto fail;
> -
> -	for_each_possible_cpu(cpu)
> -		INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
> -
> -	tp_event->perf_events = list;
> -
>  	if (!total_ref_count) {
>  		char __percpu *buf;
>  		int i;
> @@ -100,63 +90,40 @@ static int perf_trace_event_init(struct 
>  		}
>  	}
>  
> -	if (!--tp_event->perf_refcount) {
> -		free_percpu(tp_event->perf_events);
> -		tp_event->perf_events = NULL;
> -	}
> +	--tp_event->perf_refcount;
>  
>  	return ret;
>  }
>  
> -int perf_trace_init(struct perf_event *p_event)
> +int perf_trace_init(struct perf_event *p_event, int event_id)
>  {
>  	struct ftrace_event_call *tp_event;
> -	int event_id = p_event->attr.config;
> +	struct trace_event *t_event;
>  	int ret = -EINVAL;
>  
> +	trace_event_read_lock();
> +	t_event = ftrace_find_event(event_id);
> +	if (!t_event)
> +		goto out;
> +
> +	tp_event = container_of(t_event, struct ftrace_event_call, event);
> +
>  	mutex_lock(&event_mutex);
> -	list_for_each_entry(tp_event, &ftrace_events, list) {
> -		if (tp_event->event.type == event_id &&
> -		    tp_event->class && tp_event->class->reg &&
> -		    try_module_get(tp_event->mod)) {
> -			ret = perf_trace_event_init(tp_event, p_event);
> -			if (ret)
> -				module_put(tp_event->mod);
> -			break;
> -		}
> +	if (tp_event->class && tp_event->class->reg &&
> +			try_module_get(tp_event->mod)) {
> +		ret = perf_trace_event_init(tp_event, p_event);
> +		if (ret)
> +			module_put(tp_event->mod);
>  	}
>  	mutex_unlock(&event_mutex);
> +out:
> +	trace_event_read_unlock();
>  
>  	return ret;
>  }
>  
> -int perf_trace_add(struct perf_event *p_event, int flags)
> -{
> -	struct ftrace_event_call *tp_event = p_event->tp_event;
> -	struct hlist_head __percpu *pcpu_list;
> -	struct hlist_head *list;
> -
> -	pcpu_list = tp_event->perf_events;
> -	if (WARN_ON_ONCE(!pcpu_list))
> -		return -EINVAL;
> -
> -	if (!(flags & PERF_EF_START))
> -		p_event->hw.state = PERF_HES_STOPPED;
> -
> -	list = this_cpu_ptr(pcpu_list);
> -	hlist_add_head_rcu(&p_event->hlist_entry, list);
> -
> -	return 0;
> -}
> -
> -void perf_trace_del(struct perf_event *p_event, int flags)
> -{
> -	hlist_del_rcu(&p_event->hlist_entry);
> -}
> -
> -void perf_trace_destroy(struct perf_event *p_event)
> +static void __perf_trace_destroy(struct ftrace_event_call *tp_event)
>  {
> -	struct ftrace_event_call *tp_event = p_event->tp_event;
>  	int i;
>  
>  	mutex_lock(&event_mutex);
> @@ -171,9 +138,6 @@ void perf_trace_destroy(struct perf_even
>  	 */
>  	tracepoint_synchronize_unregister();
>  
> -	free_percpu(tp_event->perf_events);
> -	tp_event->perf_events = NULL;
> -
>  	if (!--total_ref_count) {
>  		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
>  			free_percpu(perf_trace_buf[i]);
> @@ -185,6 +149,27 @@ void perf_trace_destroy(struct perf_even
>  	mutex_unlock(&event_mutex);
>  }
>  
> +void perf_trace_destroy(struct perf_event *p_event)
> +{
> +	__perf_trace_destroy(p_event->tp_event);
> +}

Is this a leftover? It doesn't seem to be used anymore.

Other than that, the whole looks good. May be I need to have
one more look at the trace_output.c changes, but the conversion
from hlist to idr seemed fine at a glance.

Adding Steve in Cc.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ