lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJ+HfNgkouU8=T2+Of1nAfwBQ-eqCKKAqrNzhhEafw5qW8bO_w@mail.gmail.com>
Date:   Fri, 10 Jan 2020 16:03:47 +0100
From:   Björn Töpel <bjorn.topel@...il.com>
To:     Toke Høiland-Jørgensen <toke@...hat.com>
Cc:     Netdev <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Alexei Starovoitov <ast@...nel.org>,
        David Miller <davem@...emloft.net>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        John Fastabend <john.fastabend@...il.com>
Subject: Re: [PATCH bpf-next 1/2] xdp: Move devmap bulk queue into struct net_device

On Fri, 10 Jan 2020 at 15:22, Toke Høiland-Jørgensen <toke@...hat.com> wrote:
>
> From: Toke Høiland-Jørgensen <toke@...hat.com>
>
> Commit 96360004b862 ("xdp: Make devmap flush_list common for all map
> instances"), changed devmap flushing to be a global operation instead of a
> per-map operation. However, the queue structure used for bulking was still
> allocated as part of the containing map.
>
> This patch moves the devmap bulk queue into struct net_device. The
> motivation for this is reusing it for the non-map variant of XDP_REDIRECT,
> which will be changed in a subsequent commit.
>
> We defer the actual allocation of the bulk queue structure until the
> NETDEV_REGISTER notification devmap.c. This makes it possible to check for
> ndo_xdp_xmit support before allocating the structure, which is not possible
> at the time struct net_device is allocated. However, we keep the freeing in
> free_netdev() to avoid adding another RCU callback on NETDEV_UNREGISTER.
>
> Because of this change, we lose the reference back to the map that
> originated the redirect, so change the tracepoint to always return 0 as the
> map ID and index. Otherwise no functional change is intended with this
> patch.
>

Nice work, Toke!

I'm getting some checkpatch warnings (>80 char lines), other than that:

Acked-by: Björn Töpel <bjorn.topel@...el.com>

> Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
> ---
>  include/linux/netdevice.h  |    3 ++
>  include/trace/events/xdp.h |    2 +
>  kernel/bpf/devmap.c        |   61 ++++++++++++++++++--------------------------
>  net/core/dev.c             |    2 +
>  4 files changed, 31 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 2741aa35bec6..1b2bc2a7522e 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -876,6 +876,7 @@ enum bpf_netdev_command {
>  struct bpf_prog_offload_ops;
>  struct netlink_ext_ack;
>  struct xdp_umem;
> +struct xdp_dev_bulk_queue;
>
>  struct netdev_bpf {
>         enum bpf_netdev_command command;
> @@ -1993,6 +1994,8 @@ struct net_device {
>         spinlock_t              tx_global_lock;
>         int                     watchdog_timeo;
>
> +       struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
> +
>  #ifdef CONFIG_XPS
>         struct xps_dev_maps __rcu *xps_cpus_map;
>         struct xps_dev_maps __rcu *xps_rxqs_map;
> diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
> index a7378bcd9928..72bad13d4a3c 100644
> --- a/include/trace/events/xdp.h
> +++ b/include/trace/events/xdp.h
> @@ -278,7 +278,7 @@ TRACE_EVENT(xdp_devmap_xmit,
>         ),
>
>         TP_fast_assign(
> -               __entry->map_id         = map->id;
> +               __entry->map_id         = map ? map->id : 0;
>                 __entry->act            = XDP_REDIRECT;
>                 __entry->map_index      = map_index;
>                 __entry->drops          = drops;
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index da9c832fc5c8..bcb05cb6b728 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -53,13 +53,11 @@
>         (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
>
>  #define DEV_MAP_BULK_SIZE 16
> -struct bpf_dtab_netdev;
> -
> -struct xdp_bulk_queue {
> +struct xdp_dev_bulk_queue {
>         struct xdp_frame *q[DEV_MAP_BULK_SIZE];
>         struct list_head flush_node;
> +       struct net_device *dev;
>         struct net_device *dev_rx;
> -       struct bpf_dtab_netdev *obj;
>         unsigned int count;
>  };
>
> @@ -67,9 +65,8 @@ struct bpf_dtab_netdev {
>         struct net_device *dev; /* must be first member, due to tracepoint */
>         struct hlist_node index_hlist;
>         struct bpf_dtab *dtab;
> -       struct xdp_bulk_queue __percpu *bulkq;
>         struct rcu_head rcu;
> -       unsigned int idx; /* keep track of map index for tracepoint */
> +       unsigned int idx;
>  };
>
>  struct bpf_dtab {
> @@ -219,7 +216,6 @@ static void dev_map_free(struct bpf_map *map)
>
>                         hlist_for_each_entry_safe(dev, next, head, index_hlist) {
>                                 hlist_del_rcu(&dev->index_hlist);
> -                               free_percpu(dev->bulkq);
>                                 dev_put(dev->dev);
>                                 kfree(dev);
>                         }
> @@ -234,7 +230,6 @@ static void dev_map_free(struct bpf_map *map)
>                         if (!dev)
>                                 continue;
>
> -                       free_percpu(dev->bulkq);
>                         dev_put(dev->dev);
>                         kfree(dev);
>                 }
> @@ -320,10 +315,9 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
>         return -ENOENT;
>  }
>
> -static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
> +static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
>  {
> -       struct bpf_dtab_netdev *obj = bq->obj;
> -       struct net_device *dev = obj->dev;
> +       struct net_device *dev = bq->dev;
>         int sent = 0, drops = 0, err = 0;
>         int i;
>
> @@ -346,8 +340,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
>  out:
>         bq->count = 0;
>
> -       trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx,
> -                             sent, drops, bq->dev_rx, dev, err);
> +       trace_xdp_devmap_xmit(NULL, 0, sent, drops, bq->dev_rx, dev, err);
>         bq->dev_rx = NULL;
>         __list_del_clearprev(&bq->flush_node);
>         return 0;
> @@ -374,7 +367,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
>  void __dev_map_flush(void)
>  {
>         struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
> -       struct xdp_bulk_queue *bq, *tmp;
> +       struct xdp_dev_bulk_queue *bq, *tmp;
>
>         rcu_read_lock();
>         list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
> @@ -401,12 +394,12 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
>  /* Runs under RCU-read-side, plus in softirq under NAPI protection.
>   * Thus, safe percpu variable access.
>   */
> -static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
> +static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
>                       struct net_device *dev_rx)
>
>  {
>         struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
> -       struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
> +       struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
>
>         if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
>                 bq_xmit_all(bq, 0);
> @@ -444,7 +437,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>         if (unlikely(!xdpf))
>                 return -EOVERFLOW;
>
> -       return bq_enqueue(dst, xdpf, dev_rx);
> +       return bq_enqueue(dev, xdpf, dev_rx);
>  }
>
>  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
> @@ -483,7 +476,6 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
>         struct bpf_dtab_netdev *dev;
>
>         dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
> -       free_percpu(dev->bulkq);
>         dev_put(dev->dev);
>         kfree(dev);
>  }
> @@ -538,30 +530,14 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
>                                                     u32 ifindex,
>                                                     unsigned int idx)
>  {
> -       gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
>         struct bpf_dtab_netdev *dev;
> -       struct xdp_bulk_queue *bq;
> -       int cpu;
>
> -       dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
> +       dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, dtab->map.numa_node);
>         if (!dev)
>                 return ERR_PTR(-ENOMEM);
>
> -       dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
> -                                       sizeof(void *), gfp);
> -       if (!dev->bulkq) {
> -               kfree(dev);
> -               return ERR_PTR(-ENOMEM);
> -       }
> -
> -       for_each_possible_cpu(cpu) {
> -               bq = per_cpu_ptr(dev->bulkq, cpu);
> -               bq->obj = dev;
> -       }
> -
>         dev->dev = dev_get_by_index(net, ifindex);
>         if (!dev->dev) {
> -               free_percpu(dev->bulkq);
>                 kfree(dev);
>                 return ERR_PTR(-EINVAL);
>         }
> @@ -721,9 +697,22 @@ static int dev_map_notification(struct notifier_block *notifier,
>  {
>         struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
>         struct bpf_dtab *dtab;
> -       int i;
> +       int i, cpu;
>
>         switch (event) {
> +       case NETDEV_REGISTER:
> +               if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
> +                       break;
> +
> +               /* will be freed in free_netdev() */
> +               netdev->xdp_bulkq = __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue),
> +                                                      sizeof(void *), GFP_ATOMIC);
> +               if (!netdev->xdp_bulkq)
> +                       return NOTIFY_BAD;
> +
> +               for_each_possible_cpu(cpu)
> +                       per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
> +               break;
>         case NETDEV_UNREGISTER:
>                 /* This rcu_read_lock/unlock pair is needed because
>                  * dev_map_list is an RCU list AND to ensure a delete
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d99f88c58636..e7802a41ae7f 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -9847,6 +9847,8 @@ void free_netdev(struct net_device *dev)
>
>         free_percpu(dev->pcpu_refcnt);
>         dev->pcpu_refcnt = NULL;
> +       free_percpu(dev->xdp_bulkq);
> +       dev->xdp_bulkq = NULL;
>
>         netdev_unregister_lockdep_key(dev);
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ