[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <55B9670F.4060908@iogearbox.net>
Date: Thu, 30 Jul 2015 01:51:43 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: Kaixu Xia <xiakaixu@...wei.com>, ast@...mgrid.com,
davem@...emloft.net, acme@...nel.org, mingo@...hat.com,
a.p.zijlstra@...llo.nl, masami.hiramatsu.pt@...achi.com,
jolsa@...nel.org
CC: wangnan0@...wei.com, linux-kernel@...r.kernel.org,
pi3orama@....com, hekuang@...wei.com
Subject: Re: [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read()
that get the selected hardware PMU conuter
On 07/28/2015 01:17 PM, Kaixu Xia wrote:
> According to the perf_event_map_fd and index, the function
> bpf_perf_event_read() can convert the corresponding map
> value to the pointer to struct perf_event and return the
> Hardware PMU counter value.
>
> Signed-off-by: Kaixu Xia <xiakaixu@...wei.com>
> ---
> include/linux/bpf.h | 1 +
> include/linux/perf_event.h | 3 ++-
> include/uapi/linux/bpf.h | 1 +
> kernel/bpf/helpers.c | 36 ++++++++++++++++++++++++++++++++++++
> kernel/events/core.c | 4 ++--
> kernel/trace/bpf_trace.c | 2 ++
> 6 files changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 3c9c0eb..516992c 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -190,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
> extern const struct bpf_func_proto bpf_map_update_elem_proto;
> extern const struct bpf_func_proto bpf_map_delete_elem_proto;
>
> +extern const struct bpf_func_proto bpf_perf_event_read_proto;
> extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
> extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
> extern const struct bpf_func_proto bpf_tail_call_proto;
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 2ea4067..899abcb 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -662,7 +662,8 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
> int src_cpu, int dst_cpu);
> extern u64 perf_event_read_value(struct perf_event *event,
> u64 *enabled, u64 *running);
> -
> +extern void __perf_event_read(void *info);
> +extern u64 perf_event_count(struct perf_event *event);
>
> struct perf_sample_data {
> /*
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 69a1f6b..b9b13ce 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -250,6 +250,7 @@ enum bpf_func_id {
> * Return: 0 on success
> */
> BPF_FUNC_get_current_comm,
> + BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */
> __BPF_FUNC_MAX_ID,
> };
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1447ec0..c40c5ea 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -182,3 +182,39 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
> .arg1_type = ARG_PTR_TO_STACK,
> .arg2_type = ARG_CONST_STACK_SIZE,
> };
> +
> +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
> +{
> + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
> + struct bpf_array *array = container_of(map, struct bpf_array, map);
> + struct perf_event *event;
> +
> + if (index >= array->map.max_entries)
> + return -E2BIG;
Maybe unlikely(...), likewise below.
> + event = (struct perf_event *)array->ptrs[index];
> + if (!event)
> + return -ENOENT;
> +
> + if (event->state != PERF_EVENT_STATE_ACTIVE)
> + return -EINVAL;
> +
> + if (event->oncpu != raw_smp_processor_id() &&
> + event->ctx->task != current)
> + return -EINVAL;
> +
> + if (event->attr.inherit)
> + return -EINVAL;
> +
> + __perf_event_read(event);
> +
> + return perf_event_count(event);
I believe this helper should rather go somewhere such as bpf_trace.c
(or under kernel/events/ ?), wouldn't we otherwise get a build error
when perf events are compiled out?
Anyway, I let perf folks comment on that (and the helper in general).
> +}
> +
> +const struct bpf_func_proto bpf_perf_event_read_proto = {
> + .func = bpf_perf_event_read,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_CONST_MAP_PTR,
> + .arg2_type = ARG_ANYTHING,
> +};
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 58f0d47..c926c6d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3177,7 +3177,7 @@ void perf_event_exec(void)
> /*
> * Cross CPU call to read the hardware event
> */
> -static void __perf_event_read(void *info)
> +void __perf_event_read(void *info)
Does this need to be declared in a header file, no?
> {
> struct perf_event *event = info;
> struct perf_event_context *ctx = event->ctx;
> @@ -3204,7 +3204,7 @@ static void __perf_event_read(void *info)
> raw_spin_unlock(&ctx->lock);
> }
>
> -static inline u64 perf_event_count(struct perf_event *event)
> +u64 perf_event_count(struct perf_event *event)
Likewise? Should the inlining be preserved?
> {
> if (event->pmu->count)
> return event->pmu->count(event);
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 88a041a..9cf094f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -183,6 +183,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
Btw, I'm wondering if the perf event map portions should actually better go
here into bpf_trace.c ...
> return bpf_get_trace_printk_proto();
> case BPF_FUNC_get_smp_processor_id:
> return &bpf_get_smp_processor_id_proto;
> + case BPF_FUNC_perf_event_read:
> + return &bpf_perf_event_read_proto;
> default:
> return NULL;
> }
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists