lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 11 Oct 2017 17:02:28 +0800
From:   zhouchengming <zhouchengming1@...wei.com>
To:     Peter Zijlstra <peterz@...radead.org>
CC:     <rostedt@...dmis.org>, <mingo@...nel.org>,
        <linux-kernel@...r.kernel.org>, <jolsa@...nel.org>
Subject: Re: [PATCH 2/4] perf/ftrace: Fix function trace events

On 2017/10/11 15:45, Peter Zijlstra wrote:
> The function-trace<->  perf interface is a tad messed up. Where all
> the other trace<->  perf interfaces use a single trace hook
> registration and use per-cpu RCU based hlist to iterate the events,
> function-trace actually needs multiple hook registrations in order to
> minimize function entry patching when filters are present.
>
> The end result is that we iterate events both on the trace hook and on
> the hlist, which results in reporting events multiple times.
>
> Since function-trace cannot use the regular scheme, fix it the other
> way around, use singleton hlists.
>
> Signed-off-by: Peter Zijlstra (Intel)<peterz@...radead.org>
> ---
>   include/linux/trace_events.h    |    5 ++
>   kernel/trace/trace_event_perf.c |   82 ++++++++++++++++++++++++----------------
>   2 files changed, 55 insertions(+), 32 deletions(-)
>
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -173,6 +173,11 @@ enum trace_reg {
>   	TRACE_REG_PERF_UNREGISTER,
>   	TRACE_REG_PERF_OPEN,
>   	TRACE_REG_PERF_CLOSE,
> +	/*
> +	 * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a
> +	 * custom action was taken and the default action is not to be
> +	 * performed.
> +	 */
>   	TRACE_REG_PERF_ADD,
>   	TRACE_REG_PERF_DEL,
>   #endif
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -240,27 +240,41 @@ void perf_trace_destroy(struct perf_even
>   int perf_trace_add(struct perf_event *p_event, int flags)
>   {
>   	struct trace_event_call *tp_event = p_event->tp_event;
> -	struct hlist_head __percpu *pcpu_list;
> -	struct hlist_head *list;
>
> -	pcpu_list = tp_event->perf_events;
> -	if (WARN_ON_ONCE(!pcpu_list))
> -		return -EINVAL;
> +	/*
> +	 * If TRACE_REG_PERF_ADD returns false; no custom action was performed
> +	 * and we need to take the default action of enqueueing our event on
> +	 * the right per-cpu hlist.
> +	 */
> +	if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) {
> +		struct hlist_head __percpu *pcpu_list;
> +		struct hlist_head *list;
> +
> +		pcpu_list = tp_event->perf_events;
> +		if (WARN_ON_ONCE(!pcpu_list))
> +			return -EINVAL;
>
> -	if (!(flags&  PERF_EF_START))
> -		p_event->hw.state = PERF_HES_STOPPED;
> +		if (!(flags&  PERF_EF_START))
> +			p_event->hw.state = PERF_HES_STOPPED;

Don't we need to check the flags for ftrace perf_event?
So if we should put this outside the if (!tp_event->class->reg()) ?

>
> -	list = this_cpu_ptr(pcpu_list);
> -	hlist_add_head_rcu(&p_event->hlist_entry, list);
> +		list = this_cpu_ptr(pcpu_list);
> +		hlist_add_head_rcu(&p_event->hlist_entry, list);
> +	}

Now we don't add perf_event to the pcpu_list, so we also can avoid
to alloc pcpu_list for function tp_event in perf_trace_event_reg().

Thanks.

>
> -	return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
> +	return 0;
>   }
>
>   void perf_trace_del(struct perf_event *p_event, int flags)
>   {
>   	struct trace_event_call *tp_event = p_event->tp_event;
> -	hlist_del_rcu(&p_event->hlist_entry);
> -	tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
> +
> +	/*
> +	 * If TRACE_REG_PERF_DEL returns false; no custom action was performed
> +	 * and we need to take the default action of dequeueing our event from
> +	 * the right per-cpu hlist.
> +	 */
> +	if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event))
> +		hlist_del_rcu(&p_event->hlist_entry);
>   }
>
>   void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp)
> @@ -307,14 +321,24 @@ perf_ftrace_function_call(unsigned long
>   			  struct ftrace_ops *ops, struct pt_regs *pt_regs)
>   {
>   	struct ftrace_entry *entry;
> -	struct hlist_head *head;
> +	struct perf_event *event;
> +	struct hlist_head head;
>   	struct pt_regs regs;
>   	int rctx;
>
> -	head = this_cpu_ptr(event_function.perf_events);
> -	if (hlist_empty(head))
> +	if ((unsigned long)ops->private != smp_processor_id())
>   		return;
>
> +	event = container_of(ops, struct perf_event, ftrace_ops);
> +
> +	/*
> +	 * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all
> +	 * the perf code does is hlist_for_each_entry_rcu(), so we can
> +	 * get away with simply setting the @head.first pointer in order
> +	 * to create a singular list.
> +	 */
> +	head.first =&event->hlist_entry;
> +
>   #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
>   		    sizeof(u64)) - sizeof(u32))
>
> @@ -330,7 +354,7 @@ perf_ftrace_function_call(unsigned long
>   	entry->ip = ip;
>   	entry->parent_ip = parent_ip;
>   	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
> -			      1,&regs, head, NULL);
> +			      1,&regs,&head, NULL);
>
>   #undef ENTRY_SIZE
>   }
> @@ -339,8 +363,10 @@ static int perf_ftrace_function_register
>   {
>   	struct ftrace_ops *ops =&event->ftrace_ops;
>
> -	ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU;
> -	ops->func = perf_ftrace_function_call;
> +	ops->flags   |= FTRACE_OPS_FL_RCU;
> +	ops->func    = perf_ftrace_function_call;
> +	ops->private = (void *)(unsigned long)nr_cpu_ids;
> +
>   	return register_ftrace_function(ops);
>   }
>
> @@ -352,19 +378,11 @@ static int perf_ftrace_function_unregist
>   	return ret;
>   }
>
> -static void perf_ftrace_function_enable(struct perf_event *event)
> -{
> -	ftrace_function_local_enable(&event->ftrace_ops);
> -}
> -
> -static void perf_ftrace_function_disable(struct perf_event *event)
> -{
> -	ftrace_function_local_disable(&event->ftrace_ops);
> -}
> -
>   int perf_ftrace_event_register(struct trace_event_call *call,
>   			       enum trace_reg type, void *data)
>   {
> +	struct perf_event *event = data;
> +
>   	switch (type) {
>   	case TRACE_REG_REGISTER:
>   	case TRACE_REG_UNREGISTER:
> @@ -377,11 +395,11 @@ int perf_ftrace_event_register(struct tr
>   	case TRACE_REG_PERF_CLOSE:
>   		return perf_ftrace_function_unregister(data);
>   	case TRACE_REG_PERF_ADD:
> -		perf_ftrace_function_enable(data);
> -		return 0;
> +		event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id();
> +		return 1;
>   	case TRACE_REG_PERF_DEL:
> -		perf_ftrace_function_disable(data);
> -		return 0;
> +		event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids;
> +		return 1;
>   	}
>
>   	return -EINVAL;
>
>
>
> .
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ