[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <59DB7A29.5050906@iogearbox.net>
Date: Mon, 09 Oct 2017 15:31:21 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: Jesper Dangaard Brouer <brouer@...hat.com>, netdev@...r.kernel.org
CC: jakub.kicinski@...ronome.com,
"Michael S. Tsirkin" <mst@...hat.com>, pavel.odintsov@...il.com,
Jason Wang <jasowang@...hat.com>, mchan@...adcom.com,
John Fastabend <john.fastabend@...il.com>,
peter.waskiewicz.jr@...el.com,
Daniel Borkmann <borkmann@...earbox.net>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
Andy Gospodarek <andy@...yhouse.net>
Subject: Re: [net-next V5 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP
On 10/06/2017 06:12 PM, Jesper Dangaard Brouer wrote:
[...]
> +static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
> +{
> + struct bpf_cpu_map *cmap;
> + int err = -ENOMEM;
err init here is basically not needed since overriden later anyway
w/o being read, but ...
> + u64 cost;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return ERR_PTR(-EPERM);
> +
> + /* check sanity of attributes */
> + if (attr->max_entries == 0 || attr->key_size != 4 ||
> + attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
> + return ERR_PTR(-EINVAL);
> +
> + cmap = kzalloc(sizeof(*cmap), GFP_USER);
> + if (!cmap)
> + return ERR_PTR(-ENOMEM);
> +
> + /* mandatory map attributes */
> + cmap->map.map_type = attr->map_type;
> + cmap->map.key_size = attr->key_size;
> + cmap->map.value_size = attr->value_size;
> + cmap->map.max_entries = attr->max_entries;
> + cmap->map.map_flags = attr->map_flags;
> + cmap->map.numa_node = bpf_map_attr_numa_node(attr);
> +
> + /* Pre-limit array size based on NR_CPUS, not final CPU check */
> + if (cmap->map.max_entries > NR_CPUS)
Nit: needs to be >= NR_CPUS.
> + return ERR_PTR(-E2BIG);
> +
> + /* make sure page count doesn't overflow */
> + cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
> + cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
> + if (cost >= U32_MAX - PAGE_SIZE)
> + goto free_cmap;
> + cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
> +
> + /* Notice returns -EPERM on if map size is larger than memlock limit */
> + err = bpf_map_precharge_memlock(cmap->map.pages);
> + if (err)
> + goto free_cmap;
... here, you need to set err = -ENOMEM.
> + /* A per cpu bitfield with a bit per possible CPU in map */
> + cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
> + __alignof__(unsigned long));
> + if (!cmap->flush_needed)
> + goto free_cmap;
Otherwise when we fail here or in error case for bpf_map_area_alloc()
below, we still return 0 although it's really -ENOMEM. And returning 0,
would mean that find_and_alloc_map() will miss this since it only tests
for IS_ERR(), and we'll crash later on thinking we have a valid map
pointer.
> + /* Alloc array for possible remote "destination" CPUs */
> + cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
> + sizeof(struct bpf_cpu_map_entry *),
> + cmap->map.numa_node);
> + if (!cmap->cpu_map)
> + goto free_cmap;
> +
> + return &cmap->map;
> +free_cmap:
> + free_percpu(cmap->flush_needed);
> + kfree(cmap);
> + return ERR_PTR(err);
> +}
> +
[...]
> +int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
> + u64 map_flags)
> +{
> + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> + struct bpf_cpu_map_entry *rcpu;
> +
> + /* Array index key correspond to CPU number */
> + u32 key_cpu = *(u32 *)key;
> + /* Value is the queue size */
> + u32 qsize = *(u32 *)value;
> +
> + /* Make sure CPU is a valid possible cpu */
> + if (!cpu_possible(key_cpu))
> + return -ENODEV;
Nit: cpu_possible() expects that key_cpu < NR_CPUS, otherwise you'd
access the bitmap out of bounds.
Better move the below test for 'key_cpu >= cmap->map.max_entries'
first as on map alloc you enforce upper limit of NR_CPUS on the
max_entries, then above cpu_possible() test will be valid, too.
> + if (unlikely(map_flags > BPF_EXIST))
> + return -EINVAL;
> + if (unlikely(key_cpu >= cmap->map.max_entries))
> + return -E2BIG;
> + if (unlikely(map_flags == BPF_NOEXIST))
> + return -EEXIST;
> + if (unlikely(qsize > 16384)) /* sanity limit on qsize */
> + return -EOVERFLOW;
> +
> + if (qsize == 0) {
> + rcpu = NULL; /* Same as deleting */
> + } else {
> + /* Updating qsize cause re-allocation of bpf_cpu_map_entry */
> + rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
> + if (!rcpu)
> + return -ENOMEM;
> + }
> + rcu_read_lock();
> + __cpu_map_entry_replace(cmap, key_cpu, rcpu);
> + rcu_read_unlock();
> + return 0;
> +}
[...]
> +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
> +{
> + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> + struct bpf_cpu_map_entry *rcpu;
> +
> + if (key >= map->max_entries)
> + return NULL;
> +
> + rcpu = READ_ONCE(cmap->cpu_map[key]);
> + return rcpu;
> +}
> +
> +static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> + struct bpf_cpu_map_entry *rcpu =
> + __cpu_map_lookup_elem(map, *(u32 *)key);
> +
> + return rcpu ? &rcpu->qsize : NULL;
I still think from my prior email/comment that we should use per-cpu
scratch buffer here. Would be nice to keep the guarantee that noone
can modify it, it's just a tiny change.
> +}
> +
> +static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
> +{
> + struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> + u32 index = key ? *(u32 *)key : U32_MAX;
> + u32 *next = next_key;
> +
> + if (index >= cmap->map.max_entries) {
> + *next = 0;
> + return 0;
> + }
> +
> + if (index == cmap->map.max_entries - 1)
> + return -ENOENT;
> + *next = index + 1;
> + return 0;
> +}
> +
> +const struct bpf_map_ops cpu_map_ops = {
> + .map_alloc = cpu_map_alloc,
> + .map_free = cpu_map_free,
> + .map_delete_elem = cpu_map_delete_elem,
> + .map_update_elem = cpu_map_update_elem,
> + .map_lookup_elem = cpu_map_lookup_elem,
> + .map_get_next_key = cpu_map_get_next_key,
> +};
Powered by blists - more mailing lists