lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Tue, 20 Sep 2016 15:02:19 -0700
From:   Tom Herbert <tom@...bertland.com>
To:     Jesper Dangaard Brouer <brouer@...hat.com>
Cc:     Linux Kernel Network Developers <netdev@...r.kernel.org>,
        Tariq Toukan <tariqt@...lanox.com>,
        Brenden Blanco <bblanco@...mgrid.com>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>,
        Eric Dumazet <eric.dumazet@...il.com>
Subject: Re: [RFC PATCH] xdp: separate struct xdp_prog as container for bpf_prog

On Tue, Sep 20, 2016 at 12:55 PM, Jesper Dangaard Brouer
<brouer@...hat.com> wrote:
> Currently the XDP program is simply a bpf_prog pointer.  While it
> is good for simplicity, it is limiting extendability for upcoming
> features.
>
Hi Jesper,

Can you take a look (or try) the RFC patches I just posted to
generalize XDP. I believe that should subsume most of what you're
doing here!

Thanks,
Tom

> Introducing a new struct xdp_prog, that can carry information
> related to the XDP program.  Notice this approach does not affect
> performance (tested and benchmarked), because the extra dereference
> for the eBPF program only happens once per 64 packets in the poll
> function.
>
> The features that need this is:
>
> * Multi-port TX:
>   Need to know own port index and port lookup table.
>
> * XDP program per RX queue:
>   Need setup info about program type, global or specific, due to
>   replace semantics.
>
> * Capabilities negotiation:
>   Need to store information about features program want to use,
>   in-order to validate this.
>
> I do realize this new struct xdp_prog features cannot go into the
> kernel before one of the three users of the struct is also implemented.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@...hat.com>
> ---
>  drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   12 +++---
>  drivers/net/ethernet/mellanox/mlx4/en_rx.c     |   10 +++--
>  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    2 -
>  include/linux/filter.h                         |   14 +++++++
>  include/linux/netdevice.h                      |    2 -
>  net/core/dev.c                                 |   15 +++++--
>  net/core/filter.c                              |   50 ++++++++++++++++++++++++
>  7 files changed, 89 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index 62516f8369ba..f86f65b170f7 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -2622,11 +2622,11 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m
>         return err;
>  }
>
> -static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> +static int mlx4_xdp_set(struct net_device *dev, struct xdp_prog *prog)
>  {
>         struct mlx4_en_priv *priv = netdev_priv(dev);
>         struct mlx4_en_dev *mdev = priv->mdev;
> -       struct bpf_prog *old_prog;
> +       struct xdp_prog *old_prog;
>         int xdp_ring_num;
>         int port_up = 0;
>         int err;
> @@ -2639,7 +2639,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
>          */
>         if (priv->xdp_ring_num == xdp_ring_num) {
>                 if (prog) {
> -                       prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
> +                       prog = xdp_prog_add(prog, priv->rx_ring_num);
>                         if (IS_ERR(prog))
>                                 return PTR_ERR(prog);
>                 }
> @@ -2650,7 +2650,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
>                                         lockdep_is_held(&mdev->state_lock));
>                         rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
>                         if (old_prog)
> -                               bpf_prog_put(old_prog);
> +                               xdp_prog_put(old_prog);
>                 }
>                 mutex_unlock(&mdev->state_lock);
>                 return 0;
> @@ -2669,7 +2669,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
>         }
>
>         if (prog) {
> -               prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
> +               prog = xdp_prog_add(prog, priv->rx_ring_num);
>                 if (IS_ERR(prog))
>                         return PTR_ERR(prog);
>         }
> @@ -2690,7 +2690,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
>                                         lockdep_is_held(&mdev->state_lock));
>                 rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
>                 if (old_prog)
> -                       bpf_prog_put(old_prog);
> +                       xdp_prog_put(old_prog);
>         }
>
>         if (port_up) {
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index c46355bce613..e1182879ea6f 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -535,13 +535,13 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
>  {
>         struct mlx4_en_dev *mdev = priv->mdev;
>         struct mlx4_en_rx_ring *ring = *pring;
> -       struct bpf_prog *old_prog;
> +       struct xdp_prog *old_prog;
>
>         old_prog = rcu_dereference_protected(
>                                         ring->xdp_prog,
>                                         lockdep_is_held(&mdev->state_lock));
>         if (old_prog)
> -               bpf_prog_put(old_prog);
> +               xdp_prog_put(old_prog);
>         mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
>         vfree(ring->rx_info);
>         ring->rx_info = NULL;
> @@ -783,7 +783,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>         struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
>         struct mlx4_en_rx_alloc *frags;
>         struct mlx4_en_rx_desc *rx_desc;
> -       struct bpf_prog *xdp_prog;
> +       struct xdp_prog *xdp_prog;
> +       struct bpf_prog *bpf_prog;
>         int doorbell_pending;
>         struct sk_buff *skb;
>         int tx_index;
> @@ -805,6 +806,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>         /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
>         rcu_read_lock();
>         xdp_prog = rcu_dereference(ring->xdp_prog);
> +       bpf_prog = rcu_dereference(xdp_prog->bpf);
>         doorbell_pending = 0;
>         tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
>
> @@ -897,7 +899,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>                                                         frags[0].page_offset;
>                         xdp.data_end = xdp.data + length;
>
> -                       act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +                       act = bpf_prog_run_xdp(bpf_prog, &xdp);
>                         switch (act) {
>                         case XDP_PASS:
>                                 break;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> index a3528dd1e72e..8942201bcc38 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> @@ -340,7 +340,7 @@ struct mlx4_en_rx_ring {
>         u8  fcs_del;
>         void *buf;
>         void *rx_info;
> -       struct bpf_prog __rcu *xdp_prog;
> +       struct xdp_prog __rcu *xdp_prog;
>         struct mlx4_en_page_cache page_cache;
>         unsigned long bytes;
>         unsigned long packets;
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 1f09c521adfe..f1eee2f0f70c 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -437,6 +437,20 @@ struct xdp_buff {
>         void *data_end;
>  };
>
> +struct xdp_prog {
> +       u64             flags;
> +       struct bpf_prog *bpf;
> +       atomic_t        refcnt;
> +       struct rcu_head rcu; // Do we need RCU freeing? likely right?
> +
> +       /* Data associated with XDP program goes here */
> +
> +} ____cacheline_aligned_in_smp;
> +
> +struct xdp_prog *xdp_prog_alloc(struct bpf_prog *bpf);
> +struct xdp_prog *xdp_prog_add(struct xdp_prog *xdp, int users);
> +void xdp_prog_put(struct xdp_prog *prog);
> +
>  /* compute the linear packet data range [data, data_end) which
>   * will be accessed by cls_bpf and act_bpf programs
>   */
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index a10d8d18ce19..f9d900e62cd5 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -825,7 +825,7 @@ struct netdev_xdp {
>         enum xdp_netdev_command command;
>         union {
>                 /* XDP_SETUP_PROG */
> -               struct bpf_prog *prog;
> +               struct xdp_prog *prog;
>                 /* XDP_QUERY_PROG */
>                 bool prog_attached;
>         };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 9dbece2f1296..df3f7d3cf62e 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6643,23 +6643,30 @@ EXPORT_SYMBOL(dev_change_proto_down);
>  int dev_change_xdp_fd(struct net_device *dev, int fd)
>  {
>         const struct net_device_ops *ops = dev->netdev_ops;
> -       struct bpf_prog *prog = NULL;
> +       struct xdp_prog *prog = NULL;
> +       struct bpf_prog *bpf;
>         struct netdev_xdp xdp = {};
>         int err;
>
>         if (!ops->ndo_xdp)
>                 return -EOPNOTSUPP;
>         if (fd >= 0) {
> -               prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
> -               if (IS_ERR(prog))
> +               bpf = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); /* inc refcnt */
> +               if (IS_ERR(bpf))
> +                       return PTR_ERR(bpf);
> +
> +               prog = xdp_prog_alloc(bpf);
> +               if (IS_ERR(prog)) {
> +                       bpf_prog_put(prog->bpf);
>                         return PTR_ERR(prog);
> +               }
>         }
>
>         xdp.command = XDP_SETUP_PROG;
>         xdp.prog = prog;
>         err = ops->ndo_xdp(dev, &xdp);
>         if (err < 0 && prog)
> -               bpf_prog_put(prog);
> +               bpf_prog_put(prog->bpf);
>
>         return err;
>  }
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 298b146b47e7..a61ca13b8eaa 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2912,3 +2912,53 @@ out:
>         release_sock(sk);
>         return ret;
>  }
> +
> +struct xdp_prog *xdp_prog_alloc(struct bpf_prog *bpf)
> +{
> +       struct xdp_prog *xdp;
> +
> +       xdp = kzalloc(sizeof(*xdp), GFP_KERNEL);
> +       if (!xdp)
> +               return ERR_PTR(-ENOMEM);
> +
> +       /* Note dev_change_xdp_fd() already refcnt inc on bpf prog */
> +       xdp->bpf = bpf;
> +
> +       return xdp;
> +}
> +EXPORT_SYMBOL(xdp_prog_alloc);
> +
> +struct xdp_prog *xdp_prog_add(struct xdp_prog *xdp, int users)
> +{
> +       struct bpf_prog *bpf;
> +
> +       bpf = bpf_prog_add(xdp->bpf, users);
> +       if (IS_ERR(bpf)) {
> +               return (void*)bpf; /* it is already a PTR_ERR */
> +       }
> +       atomic_add(users, &xdp->refcnt);
> +
> +       return xdp;
> +}
> +EXPORT_SYMBOL(xdp_prog_add);
> +
> +void __xdp_prog_put_rcu(struct rcu_head *rcu)
> +{
> +       struct xdp_prog *xdp = container_of(rcu, struct xdp_prog, rcu);
> +
> +       /* Release reference to (future) xdp_port_table here */
> +
> +       /* Release refcnt from dev_change_xdp_fd() calling bpf_prog_get_type()*/
> +       bpf_prog_put(xdp->bpf);
> +
> +       kfree(xdp);
> +}
> +
> +void xdp_prog_put(struct xdp_prog *prog)
> +{
> +       bpf_prog_put(prog->bpf);
> +       if (atomic_dec_and_test(&prog->refcnt)) {
> +               call_rcu(&prog->rcu, __xdp_prog_put_rcu);
> +       }
> +}
> +EXPORT_SYMBOL(xdp_prog_put);
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ