[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150721151309.GC10689@danjae.kornet>
Date: Wed, 22 Jul 2015 00:13:09 +0900
From: Namhyung Kim <namhyung@...nel.org>
To: He Kuang <hekuang@...wei.com>
Cc: rostedt@...dmis.org, ast@...mgrid.com,
masami.hiramatsu.pt@...achi.com, acme@...nel.org,
a.p.zijlstra@...llo.nl, mingo@...hat.com, jolsa@...nel.org,
wangnan0@...wei.com, pi3orama@....com, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH v6 2/2] bpf: Introduce function for outputing data to
perf event
On Tue, Jul 21, 2015 at 03:08:50AM +0000, He Kuang wrote:
> There're scenarios that we need an eBPF program to record not only
> kprobe point args, but also the PMU counters, time latencies or the
> number of cache misses between two probe points and other information
> when the probe point is entered.
>
> This patch adds a new trace event to establish infrastruction for bpf to
> output data to perf. Userspace perf tools can detect and use this event
> as using the existing tracepoint events.
>
> New bpf trace event entry in debugfs:
>
> /sys/kernel/debug/tracing/events/bpf/bpf_output_data
>
> Userspace perf tools detect the new tracepoint event as:
>
> bpf:bpf_output_data [Tracepoint event]
>
> Data in ring-buffer of perf events added to this event will be polled
> out, sample types and other attributes can be adjusted to those events
> directly without touching the original kprobe events.
>
> The bpf helper function gives eBPF program ability to output data as
> perf sample event. This helper simple call the new trace event and
> userspace perf tools can record the BPF ftrace event to collect those
> records.
>
> Signed-off-by: He Kuang <hekuang@...wei.com>
> Acked-by: Alexei Starovoitov <ast@...mgrid.com>
Acked-by: Namhyung Kim <namhyung@...nel.org>
Thanks,
Namhyung
> ---
> include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++
> include/uapi/linux/bpf.h | 7 +++++++
> kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++
> samples/bpf/bpf_helpers.h | 2 ++
> 4 files changed, 62 insertions(+)
> create mode 100644 include/trace/events/bpf.h
>
> diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
> new file mode 100644
> index 0000000..6b739b8
> --- /dev/null
> +++ b/include/trace/events/bpf.h
> @@ -0,0 +1,30 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM bpf
> +
> +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_BPF_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(bpf_output_data,
> +
> + TP_PROTO(u64 *src, int size),
> +
> + TP_ARGS(src, size),
> +
> + TP_STRUCT__entry(
> + __dynamic_array(u8, buf, size)
> + ),
> +
> + TP_fast_assign(
> + memcpy(__get_dynamic_array(buf), src, size);
> + ),
> +
> + TP_printk("%s", __print_hex(__get_dynamic_array(buf),
> + __get_dynamic_array_len(buf)))
> +);
> +
> +#endif /* _TRACE_BPF_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 29ef6f9..5068ab1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -249,6 +249,13 @@ enum bpf_func_id {
> * Return: 0 on success
> */
> BPF_FUNC_get_current_comm,
> +
> + /**
> + * int bpf_output_trace_data(void *src, int size)
> + * Return: 0 on success
> + */
> + BPF_FUNC_output_trace_data,
> +
> __BPF_FUNC_MAX_ID,
> };
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 88a041a..219f670 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -11,7 +11,10 @@
> #include <linux/filter.h>
> #include <linux/uaccess.h>
> #include <linux/ctype.h>
> +
> #include "trace.h"
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/bpf.h>
>
> static DEFINE_PER_CPU(int, bpf_prog_active);
>
> @@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
> .arg3_type = ARG_ANYTHING,
> };
>
> +static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> +{
> + void *src = (void *) (long) r1;
> + int size = (int) r2;
> +
> + trace_bpf_output_data(src, size);
> +
> + return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_output_trace_data_proto = {
> + .func = bpf_output_trace_data,
> + .gpl_only = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_STACK,
> + .arg2_type = ARG_CONST_STACK_SIZE,
> +};
> +
> /*
> * limited trace_printk()
> * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
> @@ -169,6 +190,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
> return &bpf_map_delete_elem_proto;
> case BPF_FUNC_probe_read:
> return &bpf_probe_read_proto;
> + case BPF_FUNC_output_trace_data:
> + return &bpf_output_trace_data_proto;
> case BPF_FUNC_ktime_get_ns:
> return &bpf_ktime_get_ns_proto;
> case BPF_FUNC_tail_call:
> diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
> index bdf1c16..0aeaebe 100644
> --- a/samples/bpf/bpf_helpers.h
> +++ b/samples/bpf/bpf_helpers.h
> @@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
> (void *) BPF_FUNC_l3_csum_replace;
> static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
> (void *) BPF_FUNC_l4_csum_replace;
> +static int (*bpf_output_trace_data)(void *src, int size) =
> + (void *) BPF_FUNC_output_trace_data;
>
> #endif
> --
> 1.8.5.2
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists