lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <155136022930.3174.12316814645664378190.stgit@alrua-x1> Date: Thu, 28 Feb 2019 14:23:49 +0100 From: Toke Høiland-Jørgensen <toke@...hat.com> To: David Miller <davem@...emloft.net> Cc: netdev@...r.kernel.org, Jesper Dangaard Brouer <brouer@...hat.com>, Daniel Borkmann <daniel@...earbox.net>, Alexei Starovoitov <ast@...nel.org>, Jakub Kicinski <jakub.kicinski@...ronome.com> Subject: [PATCH net-next 2 1/3] xdp: Refactor devmap code in preparation for subsequent additions The subsequent commits introducing default maps and a hash-based ifindex devmap require a bit of refactoring of the devmap code. Perform this first so the subsequent commits become easier to read. Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com> --- kernel/bpf/devmap.c | 177 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 109 insertions(+), 68 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..1037fc08c504 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -75,6 +75,7 @@ struct bpf_dtab { struct bpf_dtab_netdev **netdev_map; unsigned long __percpu *flush_needed; struct list_head list; + struct rcu_head rcu; }; static DEFINE_SPINLOCK(dev_map_lock); @@ -85,23 +86,11 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr) return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); } -static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr, + bool check_memlock) { - struct bpf_dtab *dtab; - int err = -EINVAL; u64 cost; - - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - - /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) - return ERR_PTR(-EINVAL); - - dtab = kzalloc(sizeof(*dtab), GFP_USER); - if (!dtab) - return ERR_PTR(-ENOMEM); + int err; bpf_map_init_from_attr(&dtab->map, attr); @@ -109,60 +98,72 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost += dev_map_bitmap_size(attr) * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) - goto free_dtab; + return -EINVAL; dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(dtab->map.pages); - if (err) - goto free_dtab; - - err = -ENOMEM; + if (check_memlock) { + /* if map size is larger than memlock limit, reject it early */ + err = bpf_map_precharge_memlock(dtab->map.pages); + if (err) + return -EINVAL; + } /* A per cpu bitfield with a bit per possible net device */ dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), __alignof__(unsigned long), GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) - goto free_dtab; + goto err_alloc; dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_dtab; + goto err_map; - spin_lock(&dev_map_lock); - list_add_tail_rcu(&dtab->list, &dev_map_list); - spin_unlock(&dev_map_lock); + return 0; - return &dtab->map; -free_dtab: + err_map: free_percpu(dtab->flush_needed); - kfree(dtab); - return ERR_PTR(err); + err_alloc: + return -ENOMEM; } -static void dev_map_free(struct bpf_map *map) +static struct bpf_map *dev_map_alloc(union bpf_attr *attr) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i, cpu; + struct bpf_dtab *dtab; + int err; - /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete. The rcu critical section only guarantees - * no further reads against netdev_map. It does __not__ ensure pending - * flush operations (if any) are complete. - */ + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) + return ERR_PTR(-EINVAL); + + dtab = kzalloc(sizeof(*dtab), GFP_USER); + if (!dtab) + return ERR_PTR(-ENOMEM); + + err = dev_map_init_map(dtab, attr, true); + if (err) { + kfree(dtab); + return ERR_PTR(err); + } spin_lock(&dev_map_lock); - list_del_rcu(&dtab->list); + list_add_tail_rcu(&dtab->list, &dev_map_list); spin_unlock(&dev_map_lock); - bpf_clear_redirect_map(map); - synchronize_rcu(); + return &dtab->map; +} + +static void __dev_map_free(struct rcu_head *rcu) +{ + struct bpf_dtab *dtab = container_of(rcu, struct bpf_dtab, rcu); + int i, cpu; /* To ensure all pending flush operations have completed wait for flush * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. @@ -192,6 +193,26 @@ static void dev_map_free(struct bpf_map *map) kfree(dtab); } +static void dev_map_free(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further reads against netdev_map. It does __not__ ensure pending + * flush operations (if any) are complete. + */ + + spin_lock(&dev_map_lock); + list_del_rcu(&dtab->list); + spin_unlock(&dev_map_lock); + + bpf_clear_redirect_map(map); + call_rcu(&dtab->rcu, __dev_map_free); +} + static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); @@ -429,12 +450,42 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, + struct bpf_dtab *dtab, + u32 ifindex, + unsigned int bit) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct net *net = current->nsproxy->net_ns; gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct bpf_dtab_netdev *dev; + + dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), + sizeof(void *), gfp); + if (!dev->bulkq) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + + dev->dev = dev_get_by_index(net, ifindex); + if (!dev->dev) { + free_percpu(dev->bulkq); + kfree(dev); + return ERR_PTR(-EINVAL); + } + + dev->bit = bit; + dev->dtab = dtab; + + return dev; +} + +static int __dev_map_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; u32 i = *(u32 *)key; u32 ifindex = *(u32 *)value; @@ -449,26 +500,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, if (!ifindex) { dev = NULL; } else { - dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); - if (!dev) - return -ENOMEM; - - dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), - sizeof(void *), gfp); - if (!dev->bulkq) { - kfree(dev); - return -ENOMEM; - } - - dev->dev = dev_get_by_index(net, ifindex); - if (!dev->dev) { - free_percpu(dev->bulkq); - kfree(dev); - return -EINVAL; - } - - dev->bit = i; - dev->dtab = dtab; + dev = __dev_map_alloc_node(net, dtab, ifindex, i); + if (IS_ERR(dev)) + return PTR_ERR(dev); } /* Use call_rcu() here to ensure rcu critical sections have completed @@ -482,6 +516,13 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, return 0; } +static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return __dev_map_update_elem(current->nsproxy->net_ns, + map, key, value, map_flags); +} + const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free,
Powered by blists - more mailing lists