[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ad36fdca-5e65-e574-027c-ae363b5f95d4@fb.com>
Date: Thu, 9 Aug 2018 10:54:12 -0700
From: Yonghong Song <yhs@...com>
To: Daniel Borkmann <daniel@...earbox.net>,
Alexei Starovoitov <alexei.starovoitov@...il.com>
CC: <ast@...com>, <netdev@...r.kernel.org>, <kernel-team@...com>,
<jakub.kicinski@...ronome.com>
Subject: Re: [PATCH bpf] bpf: fix bpffs non-array map seq_show issue
On 8/9/18 10:02 AM, Daniel Borkmann wrote:
> On 08/09/2018 06:55 PM, Yonghong Song wrote:
>> On 8/9/18 8:59 AM, Daniel Borkmann wrote:
>>> On 08/09/2018 05:15 PM, Yonghong Song wrote:
>>>> On 8/9/18 7:24 AM, Daniel Borkmann wrote:
>>>>> On 08/09/2018 05:55 AM, Yonghong Song wrote:
>>>>>> On 8/8/18 7:25 PM, Alexei Starovoitov wrote:
>>>>>>> On Wed, Aug 08, 2018 at 06:25:19PM -0700, Yonghong Song wrote:
>>>>>>>> In function map_seq_next() of kernel/bpf/inode.c,
>>>>>>>> the first key will be the "0" regardless of the map type.
>>>>>>>> This works for array. But for hash type, if it happens
>>>>>>>> key "0" is in the map, the bpffs map show will miss
>>>>>>>> some items if the key "0" is not the first element of
>>>>>>>> the first bucket.
>>>>>>>>
>>>>>>>> This patch fixed the issue by guaranteeing to get
>>>>>>>> the first element, if the seq_show is just started,
>>>>>>>> by passing NULL pointer key to map_get_next_key() callback.
>>>>>>>> This way, no missing elements will occur for
>>>>>>>> bpffs hash table show even if key "0" is in the map.
>>>>>>
>>>>>> Currently, map_seq_show_elem callback is only implemented
>>>>>> for arraymap. So the problem actually is not exposed.
>>>>>>
>>>>>> The issue is discovered when I tried to implement
>>>>>> map_seq_show_elem for hash maps, and I will have followup
>>>>>> patches for it.
>>>
>>> Btw, on that note, I would also prefer if we could decouple
>>> BTF from the map_seq_show_elem() as there is really no reason
>>> to have it on a per-map. I had a patch below which would enable
>>> it for all map types generically, and bpftool works out of the
>>> box for it. Also, array doesn't really have to be 'int' type
>>> enforced as long as it's some data structure with 4 bytes, it's
>>> all fine, so this can be made fully generic (we only eventually
>>> care about the match in size).
>>
>> I agree with a generic map_check_btf as mostly we only care about size
>> and this change should enable btftool btf based pretty print for hash/lru_hash tables.
>
> Yep, agree, the below output from bpftool is from test_xdp_noinline.o
> where both work with it.
>
>>> From 0a8be27cbc2ac0c6fc2632865b5afe37222a1fc7 Mon Sep 17 00:00:00 2001
>>> Message-Id: <0a8be27cbc2ac0c6fc2632865b5afe37222a1fc7.1533830053.git.daniel@...earbox.net>
>>> From: Daniel Borkmann <daniel@...earbox.net>
>>> Date: Thu, 9 Aug 2018 16:50:21 +0200
>>> Subject: [PATCH bpf-next] bpf, btf: enable for all maps
>>>
>>> # bpftool m dump id 19
>>> [{
>>> "key": {
>>> "": {
>>> "vip": 0,
>>> "vipv6": []
>>> },
>>> "port": 0,
>>> "family": 0,
>>> "proto": 0
>>> },
>>> "value": {
>>> "flags": 0,
>>> "vip_num": 0
>>> }
>>> }
>>> ]
>>>
>>> Signed-off-by: Daniel Borkmann <daniel@...earbox.net>
>>> ---
>>> include/linux/bpf.h | 4 +---
>>> kernel/bpf/arraymap.c | 27 ---------------------------
>>> kernel/bpf/inode.c | 3 ++-
>>> kernel/bpf/syscall.c | 24 ++++++++++++++++++++----
>>> 4 files changed, 23 insertions(+), 35 deletions(-)
>>>
>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>> index cd8790d..91aa4be 100644
>>> --- a/include/linux/bpf.h
>>> +++ b/include/linux/bpf.h
>>> @@ -48,8 +48,6 @@ struct bpf_map_ops {
>>> u32 (*map_fd_sys_lookup_elem)(void *ptr);
>>> void (*map_seq_show_elem)(struct bpf_map *map, void *key,
>>> struct seq_file *m);
>>> - int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
>>> - u32 key_type_id, u32 value_type_id);
>>> };
>>>
>>> struct bpf_map {
>>> @@ -118,7 +116,7 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
>>>
>>> static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
>>> {
>>> - return map->ops->map_seq_show_elem && map->ops->map_check_btf;
>>> + return map->ops->map_seq_show_elem;
>>> }
>>>
>>> extern const struct bpf_map_ops bpf_map_offload_ops;
>>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>>> index 2aa55d030..67f0bdf 100644
>>> --- a/kernel/bpf/arraymap.c
>>> +++ b/kernel/bpf/arraymap.c
>>> @@ -358,32 +358,6 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
>>> rcu_read_unlock();
>>> }
>>>
>>> -static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
>>> - u32 btf_key_id, u32 btf_value_id)
>>> -{
>>> - const struct btf_type *key_type, *value_type;
>>> - u32 key_size, value_size;
>>> - u32 int_data;
>>> -
>>> - key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
>>> - if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
>>> - return -EINVAL;
>>> -
>>> - int_data = *(u32 *)(key_type + 1);
>>> - /* bpf array can only take a u32 key. This check makes
>>> - * sure that the btf matches the attr used during map_create.
>>> - */
>>> - if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
>>> - BTF_INT_OFFSET(int_data))
>>> - return -EINVAL;
>>> -
>>> - value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
>>> - if (!value_type || value_size != map->value_size)
>>> - return -EINVAL;
>>> -
>>> - return 0;
>>> -}
>>> -
>>> const struct bpf_map_ops array_map_ops = {
>>> .map_alloc_check = array_map_alloc_check,
>>> .map_alloc = array_map_alloc,
>>> @@ -394,7 +368,6 @@ const struct bpf_map_ops array_map_ops = {
>>> .map_delete_elem = array_map_delete_elem,
>>> .map_gen_lookup = array_map_gen_lookup,
>>> .map_seq_show_elem = array_map_seq_show_elem,
>>> - .map_check_btf = array_map_check_btf,
>>> };
>>>
>>> const struct bpf_map_ops percpu_array_map_ops = {
>>> diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
>>> index 76efe9a..400f27d 100644
>>> --- a/kernel/bpf/inode.c
>>> +++ b/kernel/bpf/inode.c
>>> @@ -332,7 +332,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
>>> struct bpf_map *map = arg;
>>>
>>> return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
>>> - map->btf ? &bpffs_map_fops : &bpffs_obj_fops);
>>> + bpf_map_support_seq_show(map) ?
>>> + &bpffs_map_fops : &bpffs_obj_fops);
>>
>> There are an issue here, the condition bpf_map_support_seq_show(map) may not be enough since the map specific implementation assumes availability of btf and proper map key/value btf_type_id's.
>> We can either use
>> map->btf && bpf_map_support_seq_show(map)
>> condition here, or check map->btf in each individual implementation
>> of map_support_seq_show().
>
> Good, point, agree. Will fix and cook proper patch later today.
Sounds good. i will wait until this gets merged and then resubmit.
> Thanks Yonghong!
>
>>> }
>>>
>>> static struct dentry *
>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>> index 5af4e9e..0b6f6e8 100644
>>> --- a/kernel/bpf/syscall.c
>>> +++ b/kernel/bpf/syscall.c
>>> @@ -455,6 +455,23 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
>>> return 0;
>>> }
>>>
>>> +static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
>>> + u32 btf_key_id, u32 btf_value_id)
>>> +{
>>> + const struct btf_type *key_type, *value_type;
>>> + u32 key_size, value_size;
>>> +
>>> + key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
>>> + if (!key_type || key_size != map->key_size)
>>> + return -EINVAL;
>>> +
>>> + value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
>>> + if (!value_type || value_size != map->value_size)
>>> + return -EINVAL;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
>>> /* called via syscall */
>>> static int map_create(union bpf_attr *attr)
>>> @@ -489,8 +506,7 @@ static int map_create(union bpf_attr *attr)
>>> atomic_set(&map->refcnt, 1);
>>> atomic_set(&map->usercnt, 1);
>>>
>>> - if (bpf_map_support_seq_show(map) &&
>>> - (attr->btf_key_type_id || attr->btf_value_type_id)) {
>>> + if (attr->btf_key_type_id || attr->btf_value_type_id) {
>>> struct btf *btf;
>>>
>>> if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
>>> @@ -504,8 +520,8 @@ static int map_create(union bpf_attr *attr)
>>> goto free_map_nouncharge;
>>> }
>>>
>>> - err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
>>> - attr->btf_value_type_id);
>>> + err = map_check_btf(map, btf, attr->btf_key_type_id,
>>> + attr->btf_value_type_id);
>>> if (err) {
>>> btf_put(btf);
>>> goto free_map_nouncharge;
>>>
>
Powered by blists - more mailing lists