[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0a11c37f-ea13-593d-ef5d-d1aa843d6a84@fb.com>
Date: Thu, 30 Nov 2017 11:12:50 -0800
From: Yonghong Song <yhs@...com>
To: Daniel Borkmann <daniel@...earbox.net>, <peterz@...radead.org>,
<rostedt@...dmis.org>, <ast@...com>, <kafai@...com>,
<netdev@...r.kernel.org>
CC: <kernel-team@...com>
Subject: Re: [PATCH net-next 1/2] bpf/tracing: allow user space to query prog
array on the same tp
On 11/30/17 9:27 AM, Daniel Borkmann wrote:
> On 11/29/2017 08:20 AM, Yonghong Song wrote:
>> Commit e87c6bc3852b ("bpf: permit multiple bpf attachments
>> for a single perf event") added support to attach multiple
>> bpf programs to a single perf event.
>> Commit 2541517c32be ("tracing, perf: Implement BPF programs
>> attached to kprobes") utilized the existing perf ioctl
>> interface and added the command PERF_EVENT_IOC_SET_BPF
>> to attach a bpf program to a tracepoint.
>>
>> This patch adds a new ioctl
>> command, given a perf event fd, to query the bpf program array
>> attached to the same perf tracepoint event.
>>
>> The new uapi ioctl command:
>> PERF_EVENT_IOC_QUERY_BPF
>>
>> The new uapi/linux/perf_event.h structure:
>> struct perf_event_query_bpf {
>> __u64 prog_ids;
>> __u32 prog_cnt;
>> };
>>
>> The usage:
>> struct perf_event_query_bpf query;
>> query.prog_ids = (__u64)usr_prog_ids_buf;
>> query.prog_cnt = usr_prog_ids_buf_len;
>> err = ioctl(pmu_efd, PERF_EVENT_IOC_QUERY_BPF, &query);
>>
>> Signed-off-by: Yonghong Song <yhs@...com>
>> ---
>> include/linux/bpf.h | 4 ++++
>> include/uapi/linux/perf_event.h | 6 ++++++
>> kernel/bpf/core.c | 24 ++++++++++++++++++++++++
>> kernel/events/core.c | 3 +++
>> kernel/trace/bpf_trace.c | 23 +++++++++++++++++++++++
>> 5 files changed, 60 insertions(+)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index e55e425..f812ac5 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -254,6 +254,7 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
>>
>> u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
>> void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
>> +int bpf_event_query_prog_array(struct perf_event *event, void __user *info);
>>
>> int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
>> union bpf_attr __user *uattr);
>> @@ -285,6 +286,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
>>
>> void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
>> struct bpf_prog *old_prog);
>> +int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
>> + __u32 __user *prog_ids, u32 request_cnt,
>> + __u32 __user *prog_cnt);
>> int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
>> struct bpf_prog *exclude_prog,
>> struct bpf_prog *include_prog,
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index b9a4953..fee0b43 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -418,6 +418,11 @@ struct perf_event_attr {
>> __u16 __reserved_2; /* align to __u64 */
>> };
>>
>> +struct perf_event_query_bpf {
>> + __u64 prog_ids;
>> + __u32 prog_cnt;
>> +};
>> +
>> #define perf_flags(attr) (*(&(attr)->read_format + 1))
>>
>> /*
>> @@ -433,6 +438,7 @@ struct perf_event_attr {
>> #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
>> #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
>> #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
>> +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
>>
>> enum perf_event_ioc_flags {
>> PERF_IOC_FLAG_GROUP = 1U << 0,
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index b9f8686..40e3b8d 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -1461,6 +1461,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
>> rcu_read_lock();
>> prog = rcu_dereference(progs)->progs;
>> for (; *prog; prog++) {
>> + if (*prog == &dummy_bpf_prog.prog)
>> + continue;
>> id = (*prog)->aux->id;
>> if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
>> rcu_read_unlock();
>> @@ -1544,6 +1546,28 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
>> return 0;
>> }
>>
>> +int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
>> + __u32 __user *prog_ids, u32 request_cnt,
>> + __u32 __user *prog_cnt)
>> +{
>> + struct bpf_prog **prog;
>> + u32 cnt = 0;
>> +
>> + if (array) {
>> + for (prog = array->progs; *prog; prog++)
>> + if (*prog != &dummy_bpf_prog.prog)
>> + cnt++;
>> + }
>> +
>> + if (copy_to_user(prog_cnt, &cnt, sizeof(cnt)))
>> + return -EFAULT;
>> +
>> + if (cnt == 0)
>> + return 0;
>
> One minor thing I still noticed in bpf_prog_array_copy_info() was
> that potentially we could return 0 as well if request_cnt was 0 if
> users only want to query if progs are present (resp. how many attached)
> but don't care which ones.
>
> Otherwise, in bpf_prog_array_copy_to_user() it tries to copy as much
> prog ids as present if request_cnt == 0. Can we handle this in user
> space e.g. by having an exposed max upper limit where user space can
> define the prog id array with?
For cgroup program array, in kernel/bpf/cgroup.c, we have
if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
return -EFAULT;
if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
/* return early if user requested only program count +
flags */
return 0;
If user requested prog_cnt is 0, or user provided prog_ids pointer is
NULL, or the number of actual programs is 0, we will just return fine.
I guess I can use the same logic here.
Thanks for spotting this issue! Will add additional tests as well for
this and send v2.
>
>> + return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt);
>> +}
>> +
>> static void bpf_prog_free_deferred(struct work_struct *work)
>> {
>> struct bpf_prog_aux *aux;
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 9404c63..93aec2c 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -4723,6 +4723,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>> rcu_read_unlock();
>> return 0;
>> }
>> +
>> + case PERF_EVENT_IOC_QUERY_BPF:
>> + return bpf_event_query_prog_array(event, (void __user *)arg);
>> default:
>> return -ENOTTY;
>> }
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index 27d1f4f..7fb7f74 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -812,3 +812,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
>> unlock:
>> mutex_unlock(&bpf_event_mutex);
>> }
>> +
>> +int bpf_event_query_prog_array(struct perf_event *event, void __user *info)
>> +{
>> + struct perf_event_query_bpf __user *uquery = info;
>> + struct perf_event_query_bpf query = {};
>> + int ret;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> + if (event->attr.type != PERF_TYPE_TRACEPOINT)
>> + return -EINVAL;
>> + if (copy_from_user(&query, uquery, sizeof(query)))
>> + return -EFAULT;
>> +
>> + mutex_lock(&bpf_event_mutex);
>> + ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
>> + u64_to_user_ptr(query.prog_ids),
>> + query.prog_cnt,
>> + &uquery->prog_cnt);
>> + mutex_unlock(&bpf_event_mutex);
>> +
>> + return ret;
>> +}
>>
>
Powered by blists - more mailing lists