lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9ea9f74a-f0db-9ecb-acba-ff4f4c198615@fb.com>
Date:   Tue, 28 Apr 2020 18:02:40 -0700
From:   Yonghong Song <yhs@...com>
To:     Martin KaFai Lau <kafai@...com>
CC:     Andrii Nakryiko <andriin@...com>, <bpf@...r.kernel.org>,
        <netdev@...r.kernel.org>, Alexei Starovoitov <ast@...com>,
        Daniel Borkmann <daniel@...earbox.net>, <kernel-team@...com>
Subject: Re: [PATCH bpf-next v1 03/19] bpf: add bpf_map iterator



On 4/28/20 5:37 PM, Martin KaFai Lau wrote:
> On Mon, Apr 27, 2020 at 01:12:37PM -0700, Yonghong Song wrote:
>> The bpf_map iterator is implemented.
>> The bpf program is called at seq_ops show() and stop() functions.
>> bpf_iter_get_prog() will retrieve bpf program and other
>> parameters during seq_file object traversal. In show() function,
>> bpf program will traverse every valid object, and in stop()
>> function, bpf program will be called one more time after all
>> objects are traversed.
>>
>> The first member of the bpf context contains the meta data, namely,
>> the seq_file, session_id and seq_num. Here, the session_id is
>> a unique id for one specific seq_file session. The seq_num is
>> the number of bpf prog invocations in the current session.
>> The bpf_iter_get_prog(), which will be implemented in subsequent
>> patches, will have more information on how meta data are computed.
>>
>> The second member of the bpf context is a struct bpf_map pointer,
>> which bpf program can examine.
>>
>> The target implementation also provided the structure definition
>> for bpf program and the function definition for verifier to
>> verify the bpf program. Specifically for bpf_map iterator,
>> the structure is "bpf_iter__bpf_map" andd the function is
>> "__bpf_iter__bpf_map".
>>
>> More targets will be implemented later, all of which will include
>> the following, similar to bpf_map iterator:
>>    - seq_ops() implementation
>>    - function definition for verifier to verify the bpf program
>>    - seq_file private data size
>>    - additional target feature
>>
>> Signed-off-by: Yonghong Song <yhs@...com>
>> ---
>>   include/linux/bpf.h   |  10 ++++
>>   kernel/bpf/Makefile   |   2 +-
>>   kernel/bpf/bpf_iter.c |  19 ++++++++
>>   kernel/bpf/map_iter.c | 107 ++++++++++++++++++++++++++++++++++++++++++
>>   kernel/bpf/syscall.c  |  13 +++++
>>   5 files changed, 150 insertions(+), 1 deletion(-)
>>   create mode 100644 kernel/bpf/map_iter.c
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 5e56abc1e2f1..4ac8d61f7c3e 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1078,6 +1078,7 @@ int  generic_map_update_batch(struct bpf_map *map,
>>   int  generic_map_delete_batch(struct bpf_map *map,
>>   			      const union bpf_attr *attr,
>>   			      union bpf_attr __user *uattr);
>> +struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
>>   
>>   extern int sysctl_unprivileged_bpf_disabled;
>>   
>> @@ -1118,7 +1119,16 @@ struct bpf_iter_reg {
>>   	u32 target_feature;
>>   };
>>   
>> +struct bpf_iter_meta {
>> +	__bpf_md_ptr(struct seq_file *, seq);
>> +	u64 session_id;
>> +	u64 seq_num;
>> +};
>> +
>>   int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
>> +struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
>> +				   u64 *session_id, u64 *seq_num, bool is_last);
>> +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
>>   
>>   int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>>   int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
>> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
>> index 6a8b0febd3f6..b2b5eefc5254 100644
>> --- a/kernel/bpf/Makefile
>> +++ b/kernel/bpf/Makefile
>> @@ -2,7 +2,7 @@
>>   obj-y := core.o
>>   CFLAGS_core.o += $(call cc-disable-warning, override-init)
>>   
>> -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o
>> +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o
>>   obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
>>   obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
>>   obj-$(CONFIG_BPF_SYSCALL) += disasm.o
>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>> index 1115b978607a..284c95587803 100644
>> --- a/kernel/bpf/bpf_iter.c
>> +++ b/kernel/bpf/bpf_iter.c
>> @@ -48,3 +48,22 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>>   
>>   	return 0;
>>   }
>> +
>> +struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
>> +				   u64 *session_id, u64 *seq_num, bool is_last)
>> +{
>> +	return NULL;
> Can this patch be moved after this function is implemented?

I tried to have an example on how regristration looks like,
so I put bpf_map iterator implementation patch immediately
after the bpf_iter_reg_target() patch. Unfortunately, I make
the iterator implementation complete and compiler can pass,
I need this function() to be implemented in the above.

I guess I can delay this patch until I can properly
implement it, just like my RFC v2.

> 
>> +}
>> +
>> +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>> +{
>> +	int ret;
>> +
>> +	migrate_disable();
>> +	rcu_read_lock();
>> +	ret = BPF_PROG_RUN(prog, ctx);
>> +	rcu_read_unlock();
>> +	migrate_enable();
>> +
>> +	return ret;
>> +}
>> diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
>> new file mode 100644
>> index 000000000000..bb3ad4c3bde5
>> --- /dev/null
>> +++ b/kernel/bpf/map_iter.c
>> @@ -0,0 +1,107 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/* Copyright (c) 2020 Facebook */
>> +#include <linux/bpf.h>
>> +#include <linux/fs.h>
>> +#include <linux/filter.h>
>> +#include <linux/kernel.h>
>> +
>> +struct bpf_iter_seq_map_info {
>> +	struct bpf_map *map;
>> +	u32 id;
>> +};
>> +
>> +static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
>> +{
>> +	struct bpf_iter_seq_map_info *info = seq->private;
>> +	struct bpf_map *map;
>> +	u32 id = info->id;
>> +
>> +	map = bpf_map_get_curr_or_next(&id);
>> +	if (IS_ERR_OR_NULL(map))
>> +		return NULL;
>> +
>> +	++*pos;
> Does pos always need to be incremented here?

Yes, I skipped passing SEQ_START_TOKEN to show(). Put it another way,
bpf program won't be called for SEQ_START_TOKEN, so I did a shortcut here.

> 
>> +	info->map = map;
>> +	info->id = id;
>> +	return map;
>> +}
>> +
>> +static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>> +{
>> +	struct bpf_iter_seq_map_info *info = seq->private;
>> +	struct bpf_map *map;
>> +
>> +	++*pos;
>> +	++info->id;
>> +	map = bpf_map_get_curr_or_next(&info->id);
>> +	if (IS_ERR_OR_NULL(map))
>> +		return NULL;
>> +
>> +	bpf_map_put(info->map);
>> +	info->map = map;
>> +	return map;
>> +}
>> +
>> +struct bpf_iter__bpf_map {
>> +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
>> +	__bpf_md_ptr(struct bpf_map *, map);
>> +};
>> +
>> +int __init __bpf_iter__bpf_map(struct bpf_iter_meta *meta, struct bpf_map *map)
>> +{
>> +	return 0;
>> +}
>> +
>> +static int bpf_map_seq_show(struct seq_file *seq, void *v)
>> +{
>> +	struct bpf_iter_meta meta;
>> +	struct bpf_iter__bpf_map ctx;
>> +	struct bpf_prog *prog;
>> +	int ret = 0;
>> +
>> +	ctx.meta = &meta;
>> +	ctx.map = v;
>> +	meta.seq = seq;
>> +	prog = bpf_iter_get_prog(seq, sizeof(struct bpf_iter_seq_map_info),
>> +				 &meta.session_id, &meta.seq_num,
>> +				 v == (void *)0);
>  From looking at seq_file.c, when will show() be called with "v == NULL"?

In the stop() function.

> 
>> +	if (prog)
>> +		ret = bpf_iter_run_prog(prog, &ctx);
>> +
>> +	return ret == 0 ? 0 : -EINVAL;
> The verifier change in patch 4 should have ensured that prog
> can only return 0?

Yes. I forgot to update this after last minutes I added verifier
enforcement. I can do
	if (prog)
		bpf_iter_run_prog(prog, &ctx);

	return 0;

> 
>> +}
>> +
>> +static void bpf_map_seq_stop(struct seq_file *seq, void *v)
>> +{
>> +	struct bpf_iter_seq_map_info *info = seq->private;
>> +
>> +	if (!v)
>> +		bpf_map_seq_show(seq, v);

bpf program for NULL object is called here.

>> +
>> +	if (info->map) {
>> +		bpf_map_put(info->map);
>> +		info->map = NULL;
>> +	}
>> +}
>> +
>> +static const struct seq_operations bpf_map_seq_ops = {
>> +	.start	= bpf_map_seq_start,
>> +	.next	= bpf_map_seq_next,
>> +	.stop	= bpf_map_seq_stop,
>> +	.show	= bpf_map_seq_show,
>> +};
>> +
>> +static int __init bpf_map_iter_init(void)
>> +{
>> +	struct bpf_iter_reg reg_info = {
>> +		.target			= "bpf_map",
>> +		.target_func_name	= "__bpf_iter__bpf_map",
>> +		.seq_ops		= &bpf_map_seq_ops,
>> +		.seq_priv_size		= sizeof(struct bpf_iter_seq_map_info),
>> +		.target_feature		= 0,
>> +	};
>> +
>> +	return bpf_iter_reg_target(&reg_info);
>> +}
>> +
>> +late_initcall(bpf_map_iter_init);
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 7626b8024471..022187640943 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -2800,6 +2800,19 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
>>   	return err;
>>   }
>>   
>> +struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
>> +{
>> +	struct bpf_map *map;
>> +
>> +	spin_lock_bh(&map_idr_lock);
>> +	map = idr_get_next(&map_idr, id);
>> +	if (map)
>> +		map = __bpf_map_inc_not_zero(map, false);
> nit. For the !map case, set "map = ERR_PTR(-ENOENT)" so that
> the _OR_NULL() test is not needed.  It will be more consistent
> with other error checking codes in syscall.c.

Good point, will do that.

> 
>> +	spin_unlock_bh(&map_idr_lock);
>> +
>> +	return map;
>> +}
>> +
>>   #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
>>   
>>   struct bpf_prog *bpf_prog_by_id(u32 id)
>> -- 
>> 2.24.1
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ