linux-kernel - Re: [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <55B9670F.4060908@iogearbox.net>
Date:	Thu, 30 Jul 2015 01:51:43 +0200
From:	Daniel Borkmann <daniel@...earbox.net>
To:	Kaixu Xia <xiakaixu@...wei.com>, ast@...mgrid.com,
	davem@...emloft.net, acme@...nel.org, mingo@...hat.com,
	a.p.zijlstra@...llo.nl, masami.hiramatsu.pt@...achi.com,
	jolsa@...nel.org
CC:	wangnan0@...wei.com, linux-kernel@...r.kernel.org,
	pi3orama@....com, hekuang@...wei.com
Subject: Re: [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read()
 that get the selected hardware PMU conuter

On 07/28/2015 01:17 PM, Kaixu Xia wrote:
> According to the perf_event_map_fd and index, the function
> bpf_perf_event_read() can convert the corresponding map
> value to the pointer to struct perf_event and return the
> Hardware PMU counter value.
>
> Signed-off-by: Kaixu Xia <xiakaixu@...wei.com>
> ---
>   include/linux/bpf.h        |  1 +
>   include/linux/perf_event.h |  3 ++-
>   include/uapi/linux/bpf.h   |  1 +
>   kernel/bpf/helpers.c       | 36 ++++++++++++++++++++++++++++++++++++
>   kernel/events/core.c       |  4 ++--
>   kernel/trace/bpf_trace.c   |  2 ++
>   6 files changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 3c9c0eb..516992c 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -190,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
>   extern const struct bpf_func_proto bpf_map_update_elem_proto;
>   extern const struct bpf_func_proto bpf_map_delete_elem_proto;
>
> +extern const struct bpf_func_proto bpf_perf_event_read_proto;
>   extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
>   extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
>   extern const struct bpf_func_proto bpf_tail_call_proto;
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 2ea4067..899abcb 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -662,7 +662,8 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
>   				int src_cpu, int dst_cpu);
>   extern u64 perf_event_read_value(struct perf_event *event,
>   				 u64 *enabled, u64 *running);
> -
> +extern void __perf_event_read(void *info);
> +extern u64 perf_event_count(struct perf_event *event);
>
>   struct perf_sample_data {
>   	/*
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 69a1f6b..b9b13ce 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -250,6 +250,7 @@ enum bpf_func_id {
>   	 * Return: 0 on success
>   	 */
>   	BPF_FUNC_get_current_comm,
> +	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
>   	__BPF_FUNC_MAX_ID,
>   };
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1447ec0..c40c5ea 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -182,3 +182,39 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
>   	.arg1_type	= ARG_PTR_TO_STACK,
>   	.arg2_type	= ARG_CONST_STACK_SIZE,
>   };
> +
> +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
> +{
> +	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
> +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> +	struct perf_event *event;
> +
> +	if (index >= array->map.max_entries)
> +		return -E2BIG;

Maybe unlikely(...), likewise below.

> +	event = (struct perf_event *)array->ptrs[index];
> +	if (!event)
> +		return -ENOENT;
> +
> +	if (event->state != PERF_EVENT_STATE_ACTIVE)
> +		return -EINVAL;
> +
> +	if (event->oncpu != raw_smp_processor_id() &&
> +	    event->ctx->task != current)
> +		return -EINVAL;
> +
> +	if (event->attr.inherit)
> +		return -EINVAL;
> +
> +	__perf_event_read(event);
> +
> +	return perf_event_count(event);

I believe this helper should rather go somewhere such as bpf_trace.c
(or under kernel/events/ ?), wouldn't we otherwise get a build error
when perf events are compiled out?

Anyway, I let perf folks comment on that (and the helper in general).

> +}
> +
> +const struct bpf_func_proto bpf_perf_event_read_proto = {
> +	.func		= bpf_perf_event_read,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_CONST_MAP_PTR,
> +	.arg2_type	= ARG_ANYTHING,
> +};
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 58f0d47..c926c6d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3177,7 +3177,7 @@ void perf_event_exec(void)
>   /*
>    * Cross CPU call to read the hardware event
>    */
> -static void __perf_event_read(void *info)
> +void __perf_event_read(void *info)

Does this need to be declared in a header file, no?

>   {
>   	struct perf_event *event = info;
>   	struct perf_event_context *ctx = event->ctx;
> @@ -3204,7 +3204,7 @@ static void __perf_event_read(void *info)
>   	raw_spin_unlock(&ctx->lock);
>   }
>
> -static inline u64 perf_event_count(struct perf_event *event)
> +u64 perf_event_count(struct perf_event *event)

Likewise? Should the inlining be preserved?

>   {
>   	if (event->pmu->count)
>   		return event->pmu->count(event);
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 88a041a..9cf094f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -183,6 +183,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func

Btw, I'm wondering if the perf event map portions should actually better go
here into bpf_trace.c ...

>   		return bpf_get_trace_printk_proto();
>   	case BPF_FUNC_get_smp_processor_id:
>   		return &bpf_get_smp_processor_id_proto;
> +	case BPF_FUNC_perf_event_read:
> +		return &bpf_perf_event_read_proto;
>   	default:
>   		return NULL;
>   	}
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/