[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1db29c53-1568-a3b5-f2d3-1c830aefe33b@iogearbox.net>
Date: Wed, 23 May 2018 11:54:38 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: Jesper Dangaard Brouer <brouer@...hat.com>, netdev@...r.kernel.org,
Daniel Borkmann <borkmann@...earbox.net>,
Alexei Starovoitov <alexei.starovoitov@...il.com>
Cc: Christoph Hellwig <hch@...radead.org>,
BjörnTöpel <bjorn.topel@...el.com>,
Magnus Karlsson <magnus.karlsson@...el.com>,
makita.toshiaki@....ntt.co.jp
Subject: Re: [bpf-next V4 PATCH 2/8] bpf: devmap prepare xdp frames for
bulking
On 05/18/2018 03:34 PM, Jesper Dangaard Brouer wrote:
> Like cpumap create queue for xdp frames that will be bulked. For now,
> this patch simply invoke ndo_xdp_xmit foreach frame. This happens,
> either when the map flush operation is envoked, or when the limit
> DEV_MAP_BULK_SIZE is reached.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@...hat.com>
> ---
> kernel/bpf/devmap.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 73 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index 808808bf2bf2..cab72c100bb5 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -54,11 +54,18 @@
> #define DEV_CREATE_FLAG_MASK \
> (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
>
> +#define DEV_MAP_BULK_SIZE 16
> +struct xdp_bulk_queue {
> + struct xdp_frame *q[DEV_MAP_BULK_SIZE];
> + unsigned int count;
> +};
> +
> /* objects in the map */
> struct bpf_dtab_netdev {
> struct net_device *dev; /* must be first member, due to tracepoint */
> struct bpf_dtab *dtab;
> unsigned int bit;
> + struct xdp_bulk_queue __percpu *bulkq;
> struct rcu_head rcu;
> };
>
> @@ -209,6 +216,38 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
> __set_bit(bit, bitmap);
> }
>
> +static int bq_xmit_all(struct bpf_dtab_netdev *obj,
> + struct xdp_bulk_queue *bq)
> +{
> + unsigned int processed = 0, drops = 0;
> + struct net_device *dev = obj->dev;
> + int i;
> +
> + if (unlikely(!bq->count))
> + return 0;
> +
> + for (i = 0; i < bq->count; i++) {
> + struct xdp_frame *xdpf = bq->q[i];
> +
> + prefetch(xdpf);
> + }
> +
> + for (i = 0; i < bq->count; i++) {
> + struct xdp_frame *xdpf = bq->q[i];
> + int err;
> +
> + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
> + if (err) {
> + drops++;
> + xdp_return_frame(xdpf);
> + }
> + processed++;
This sort of thing makes it really hard to review. 'processed' and
'drops' are not read anywhere in this function. So I need to go and
check all the other patches whether there's further logic involved here
or not. I had to review your series after applying all patches in a
local branch, please never do this. Add the logic in a patch where it's
self-contained and obvious to review.
> + }
> + bq->count = 0;
> +
> + return 0;
> +}
> +
> /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
> * from the driver before returning from its napi->poll() routine. The poll()
> * routine is called either from busy_poll context or net_rx_action signaled
> @@ -224,6 +263,7 @@ void __dev_map_flush(struct bpf_map *map)
>
> for_each_set_bit(bit, bitmap, map->max_entries) {
> struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
> + struct xdp_bulk_queue *bq;
> struct net_device *netdev;
>
> /* This is possible if the dev entry is removed by user space
> @@ -233,6 +273,9 @@ void __dev_map_flush(struct bpf_map *map)
> continue;
>
> __clear_bit(bit, bitmap);
> +
> + bq = this_cpu_ptr(dev->bulkq);
> + bq_xmit_all(dev, bq);
> netdev = dev->dev;
> if (likely(netdev->netdev_ops->ndo_xdp_flush))
> netdev->netdev_ops->ndo_xdp_flush(netdev);
> @@ -255,6 +298,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
> return obj;
> }
>
> +/* Runs under RCU-read-side, plus in softirq under NAPI protection.
> + * Thus, safe percpu variable access.
> + */
> +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
> +{
> + struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
> +
> + if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
> + bq_xmit_all(obj, bq);
> +
> + bq->q[bq->count++] = xdpf;
> + return 0;
> +}
> +
> int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
> {
> struct net_device *dev = dst->dev;
> @@ -268,8 +325,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
> if (unlikely(!xdpf))
> return -EOVERFLOW;
>
> - /* TODO: implement a bulking/enqueue step later */
> - err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
> + err = bq_enqueue(dst, xdpf);
> if (err)
> return err;
>
> @@ -288,13 +344,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
> {
> if (dev->dev->netdev_ops->ndo_xdp_flush) {
> struct net_device *fl = dev->dev;
> + struct xdp_bulk_queue *bq;
> unsigned long *bitmap;
> +
> int cpu;
Please remove the newline from above.
> for_each_online_cpu(cpu) {
> bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
> __clear_bit(dev->bit, bitmap);
>
> + bq = per_cpu_ptr(dev->bulkq, cpu);
> + bq_xmit_all(dev, bq);
> +
> fl->netdev_ops->ndo_xdp_flush(dev->dev);
> }
> }
> @@ -306,6 +367,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
>
> dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
> dev_map_flush_old(dev);
> + free_percpu(dev->bulkq);
> dev_put(dev->dev);
> kfree(dev);
> }
> @@ -338,6 +400,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
> {
> struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
> struct net *net = current->nsproxy->net_ns;
> + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
> struct bpf_dtab_netdev *dev, *old_dev;
> u32 i = *(u32 *)key;
> u32 ifindex = *(u32 *)value;
> @@ -352,11 +415,17 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
> if (!ifindex) {
> dev = NULL;
> } else {
> - dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
> - map->numa_node);
> + dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
> if (!dev)
> return -ENOMEM;
>
> + dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
> + sizeof(void *), gfp);
> + if (!dev->bulkq) {
> + kfree(dev);
> + return -ENOMEM;
> + }
> +
> dev->dev = dev_get_by_index(net, ifindex);
> if (!dev->dev) {
> kfree(dev);
Ahh well, and I just realized that here you are leaking memory in the error path. :(
A free_percpu(dev->bulkq) is missing.
Please fix this bug up and send a fresh series, so we can fix this right away without
needing a follow-up in bpf-next.
Thanks,
Daniel
Powered by blists - more mailing lists