netdev - Re: [PATCH net-next 1/2] bpf/tracing: allow user space to query prog array on the same tp

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0a11c37f-ea13-593d-ef5d-d1aa843d6a84@fb.com>
Date:   Thu, 30 Nov 2017 11:12:50 -0800
From:   Yonghong Song <yhs@...com>
To:     Daniel Borkmann <daniel@...earbox.net>, <peterz@...radead.org>,
        <rostedt@...dmis.org>, <ast@...com>, <kafai@...com>,
        <netdev@...r.kernel.org>
CC:     <kernel-team@...com>
Subject: Re: [PATCH net-next 1/2] bpf/tracing: allow user space to query prog
 array on the same tp



On 11/30/17 9:27 AM, Daniel Borkmann wrote:
> On 11/29/2017 08:20 AM, Yonghong Song wrote:
>> Commit e87c6bc3852b ("bpf: permit multiple bpf attachments
>> for a single perf event") added support to attach multiple
>> bpf programs to a single perf event.
>> Commit 2541517c32be ("tracing, perf: Implement BPF programs
>> attached to kprobes") utilized the existing perf ioctl
>> interface and added the command PERF_EVENT_IOC_SET_BPF
>> to attach a bpf program to a tracepoint.
>>
>> This patch adds a new ioctl
>> command, given a perf event fd, to query the bpf program array
>> attached to the same perf tracepoint event.
>>
>> The new uapi ioctl command:
>>    PERF_EVENT_IOC_QUERY_BPF
>>
>> The new uapi/linux/perf_event.h structure:
>>    struct perf_event_query_bpf {
>>         __u64	prog_ids;
>>         __u32	prog_cnt;
>>    };
>>
>> The usage:
>>    struct perf_event_query_bpf query;
>>    query.prog_ids = (__u64)usr_prog_ids_buf;
>>    query.prog_cnt = usr_prog_ids_buf_len;
>>    err = ioctl(pmu_efd, PERF_EVENT_IOC_QUERY_BPF, &query);
>>
>> Signed-off-by: Yonghong Song <yhs@...com>
>> ---
>>   include/linux/bpf.h             |  4 ++++
>>   include/uapi/linux/perf_event.h |  6 ++++++
>>   kernel/bpf/core.c               | 24 ++++++++++++++++++++++++
>>   kernel/events/core.c            |  3 +++
>>   kernel/trace/bpf_trace.c        | 23 +++++++++++++++++++++++
>>   5 files changed, 60 insertions(+)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index e55e425..f812ac5 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -254,6 +254,7 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
>>   
>>   u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
>>   		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
>> +int bpf_event_query_prog_array(struct perf_event *event, void __user *info);
>>   
>>   int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
>>   			  union bpf_attr __user *uattr);
>> @@ -285,6 +286,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
>>   
>>   void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
>>   				struct bpf_prog *old_prog);
>> +int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
>> +			     __u32 __user *prog_ids, u32 request_cnt,
>> +			     __u32 __user *prog_cnt);
>>   int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
>>   			struct bpf_prog *exclude_prog,
>>   			struct bpf_prog *include_prog,
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index b9a4953..fee0b43 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -418,6 +418,11 @@ struct perf_event_attr {
>>   	__u16	__reserved_2;	/* align to __u64 */
>>   };
>>   
>> +struct perf_event_query_bpf {
>> +	__u64	prog_ids;
>> +	__u32	prog_cnt;
>> +};
>> +
>>   #define perf_flags(attr)	(*(&(attr)->read_format + 1))
>>   
>>   /*
>> @@ -433,6 +438,7 @@ struct perf_event_attr {
>>   #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
>>   #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
>>   #define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
>> +#define PERF_EVENT_IOC_QUERY_BPF	_IOWR('$', 10, struct perf_event_query_bpf *)
>>   
>>   enum perf_event_ioc_flags {
>>   	PERF_IOC_FLAG_GROUP		= 1U << 0,
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index b9f8686..40e3b8d 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -1461,6 +1461,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
>>   	rcu_read_lock();
>>   	prog = rcu_dereference(progs)->progs;
>>   	for (; *prog; prog++) {
>> +		if (*prog == &dummy_bpf_prog.prog)
>> +			continue;
>>   		id = (*prog)->aux->id;
>>   		if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
>>   			rcu_read_unlock();
>> @@ -1544,6 +1546,28 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
>>   	return 0;
>>   }
>>   
>> +int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
>> +			     __u32 __user *prog_ids, u32 request_cnt,
>> +			     __u32 __user *prog_cnt)
>> +{
>> +	struct bpf_prog **prog;
>> +	u32 cnt = 0;
>> +
>> +	if (array) {
>> +		for (prog = array->progs; *prog; prog++)
>> +			if (*prog != &dummy_bpf_prog.prog)
>> +				cnt++;
>> +	}
>> +
>> +	if (copy_to_user(prog_cnt, &cnt, sizeof(cnt)))
>> +		return -EFAULT;
>> +
>> +	if (cnt == 0)
>> +		return 0;
> 
> One minor thing I still noticed in bpf_prog_array_copy_info() was
> that potentially we could return 0 as well if request_cnt was 0 if
> users only want to query if progs are present (resp. how many attached)
> but don't care which ones.
> 
> Otherwise, in bpf_prog_array_copy_to_user() it tries to copy as much
> prog ids as present if request_cnt == 0. Can we handle this in user
> space e.g. by having an exposed max upper limit where user space can
> define the prog id array with?

For cgroup program array, in kernel/bpf/cgroup.c, we have
         if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
                 return -EFAULT;
         if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
                 /* return early if user requested only program count + 
flags */
                 return 0;

If user requested prog_cnt is 0, or user provided prog_ids pointer is 
NULL, or the number of actual programs is 0, we will just return fine.
I guess I can use the same logic here.

Thanks for spotting this issue! Will add additional tests as well for 
this and send v2.

> 
>> +	return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt);
>> +}
>> +
>>   static void bpf_prog_free_deferred(struct work_struct *work)
>>   {
>>   	struct bpf_prog_aux *aux;
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 9404c63..93aec2c 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -4723,6 +4723,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>>   		rcu_read_unlock();
>>   		return 0;
>>   	}
>> +
>> +	case PERF_EVENT_IOC_QUERY_BPF:
>> +		return bpf_event_query_prog_array(event, (void __user *)arg);
>>   	default:
>>   		return -ENOTTY;
>>   	}
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index 27d1f4f..7fb7f74 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -812,3 +812,26 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
>>   unlock:
>>   	mutex_unlock(&bpf_event_mutex);
>>   }
>> +
>> +int bpf_event_query_prog_array(struct perf_event *event, void __user *info)
>> +{
>> +	struct perf_event_query_bpf __user *uquery = info;
>> +	struct perf_event_query_bpf query = {};
>> +	int ret;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +	if (event->attr.type != PERF_TYPE_TRACEPOINT)
>> +		return -EINVAL;
>> +	if (copy_from_user(&query, uquery, sizeof(query)))
>> +		return -EFAULT;
>> +
>> +	mutex_lock(&bpf_event_mutex);
>> +	ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
>> +				       u64_to_user_ptr(query.prog_ids),
>> +				       query.prog_cnt,
>> +				       &uquery->prog_cnt);
>> +	mutex_unlock(&bpf_event_mutex);
>> +
>> +	return ret;
>> +}
>>
>