[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CALx6S37Dq3PxdWX4dZdGu6X0RJPkpAJtGKdr60wPuo5LG369sw@mail.gmail.com>
Date: Tue, 20 Sep 2016 15:02:19 -0700
From: Tom Herbert <tom@...bertland.com>
To: Jesper Dangaard Brouer <brouer@...hat.com>
Cc: Linux Kernel Network Developers <netdev@...r.kernel.org>,
Tariq Toukan <tariqt@...lanox.com>,
Brenden Blanco <bblanco@...mgrid.com>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: Re: [RFC PATCH] xdp: separate struct xdp_prog as container for bpf_prog
On Tue, Sep 20, 2016 at 12:55 PM, Jesper Dangaard Brouer
<brouer@...hat.com> wrote:
> Currently the XDP program is simply a bpf_prog pointer. While it
> is good for simplicity, it is limiting extendability for upcoming
> features.
>
Hi Jesper,
Can you take a look (or try) the RFC patches I just posted to
generalize XDP. I believe that should subsume most of what you're
doing here!
Thanks,
Tom
> Introducing a new struct xdp_prog, that can carry information
> related to the XDP program. Notice this approach does not affect
> performance (tested and benchmarked), because the extra dereference
> for the eBPF program only happens once per 64 packets in the poll
> function.
>
> The features that need this is:
>
> * Multi-port TX:
> Need to know own port index and port lookup table.
>
> * XDP program per RX queue:
> Need setup info about program type, global or specific, due to
> replace semantics.
>
> * Capabilities negotiation:
> Need to store information about features program want to use,
> in-order to validate this.
>
> I do realize this new struct xdp_prog features cannot go into the
> kernel before one of the three users of the struct is also implemented.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@...hat.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 +++---
> drivers/net/ethernet/mellanox/mlx4/en_rx.c | 10 +++--
> drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 -
> include/linux/filter.h | 14 +++++++
> include/linux/netdevice.h | 2 -
> net/core/dev.c | 15 +++++--
> net/core/filter.c | 50 ++++++++++++++++++++++++
> 7 files changed, 89 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index 62516f8369ba..f86f65b170f7 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -2622,11 +2622,11 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m
> return err;
> }
>
> -static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> +static int mlx4_xdp_set(struct net_device *dev, struct xdp_prog *prog)
> {
> struct mlx4_en_priv *priv = netdev_priv(dev);
> struct mlx4_en_dev *mdev = priv->mdev;
> - struct bpf_prog *old_prog;
> + struct xdp_prog *old_prog;
> int xdp_ring_num;
> int port_up = 0;
> int err;
> @@ -2639,7 +2639,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> */
> if (priv->xdp_ring_num == xdp_ring_num) {
> if (prog) {
> - prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
> + prog = xdp_prog_add(prog, priv->rx_ring_num);
> if (IS_ERR(prog))
> return PTR_ERR(prog);
> }
> @@ -2650,7 +2650,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> lockdep_is_held(&mdev->state_lock));
> rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
> if (old_prog)
> - bpf_prog_put(old_prog);
> + xdp_prog_put(old_prog);
> }
> mutex_unlock(&mdev->state_lock);
> return 0;
> @@ -2669,7 +2669,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> }
>
> if (prog) {
> - prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
> + prog = xdp_prog_add(prog, priv->rx_ring_num);
> if (IS_ERR(prog))
> return PTR_ERR(prog);
> }
> @@ -2690,7 +2690,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
> lockdep_is_held(&mdev->state_lock));
> rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
> if (old_prog)
> - bpf_prog_put(old_prog);
> + xdp_prog_put(old_prog);
> }
>
> if (port_up) {
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index c46355bce613..e1182879ea6f 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -535,13 +535,13 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
> {
> struct mlx4_en_dev *mdev = priv->mdev;
> struct mlx4_en_rx_ring *ring = *pring;
> - struct bpf_prog *old_prog;
> + struct xdp_prog *old_prog;
>
> old_prog = rcu_dereference_protected(
> ring->xdp_prog,
> lockdep_is_held(&mdev->state_lock));
> if (old_prog)
> - bpf_prog_put(old_prog);
> + xdp_prog_put(old_prog);
> mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
> vfree(ring->rx_info);
> ring->rx_info = NULL;
> @@ -783,7 +783,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
> struct mlx4_en_rx_alloc *frags;
> struct mlx4_en_rx_desc *rx_desc;
> - struct bpf_prog *xdp_prog;
> + struct xdp_prog *xdp_prog;
> + struct bpf_prog *bpf_prog;
> int doorbell_pending;
> struct sk_buff *skb;
> int tx_index;
> @@ -805,6 +806,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
> rcu_read_lock();
> xdp_prog = rcu_dereference(ring->xdp_prog);
> + bpf_prog = rcu_dereference(xdp_prog->bpf);
> doorbell_pending = 0;
> tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
>
> @@ -897,7 +899,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> frags[0].page_offset;
> xdp.data_end = xdp.data + length;
>
> - act = bpf_prog_run_xdp(xdp_prog, &xdp);
> + act = bpf_prog_run_xdp(bpf_prog, &xdp);
> switch (act) {
> case XDP_PASS:
> break;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> index a3528dd1e72e..8942201bcc38 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> @@ -340,7 +340,7 @@ struct mlx4_en_rx_ring {
> u8 fcs_del;
> void *buf;
> void *rx_info;
> - struct bpf_prog __rcu *xdp_prog;
> + struct xdp_prog __rcu *xdp_prog;
> struct mlx4_en_page_cache page_cache;
> unsigned long bytes;
> unsigned long packets;
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 1f09c521adfe..f1eee2f0f70c 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -437,6 +437,20 @@ struct xdp_buff {
> void *data_end;
> };
>
> +struct xdp_prog {
> + u64 flags;
> + struct bpf_prog *bpf;
> + atomic_t refcnt;
> + struct rcu_head rcu; // Do we need RCU freeing? likely right?
> +
> + /* Data associated with XDP program goes here */
> +
> +} ____cacheline_aligned_in_smp;
> +
> +struct xdp_prog *xdp_prog_alloc(struct bpf_prog *bpf);
> +struct xdp_prog *xdp_prog_add(struct xdp_prog *xdp, int users);
> +void xdp_prog_put(struct xdp_prog *prog);
> +
> /* compute the linear packet data range [data, data_end) which
> * will be accessed by cls_bpf and act_bpf programs
> */
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index a10d8d18ce19..f9d900e62cd5 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -825,7 +825,7 @@ struct netdev_xdp {
> enum xdp_netdev_command command;
> union {
> /* XDP_SETUP_PROG */
> - struct bpf_prog *prog;
> + struct xdp_prog *prog;
> /* XDP_QUERY_PROG */
> bool prog_attached;
> };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 9dbece2f1296..df3f7d3cf62e 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6643,23 +6643,30 @@ EXPORT_SYMBOL(dev_change_proto_down);
> int dev_change_xdp_fd(struct net_device *dev, int fd)
> {
> const struct net_device_ops *ops = dev->netdev_ops;
> - struct bpf_prog *prog = NULL;
> + struct xdp_prog *prog = NULL;
> + struct bpf_prog *bpf;
> struct netdev_xdp xdp = {};
> int err;
>
> if (!ops->ndo_xdp)
> return -EOPNOTSUPP;
> if (fd >= 0) {
> - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
> - if (IS_ERR(prog))
> + bpf = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); /* inc refcnt */
> + if (IS_ERR(bpf))
> + return PTR_ERR(bpf);
> +
> + prog = xdp_prog_alloc(bpf);
> + if (IS_ERR(prog)) {
> + bpf_prog_put(prog->bpf);
> return PTR_ERR(prog);
> + }
> }
>
> xdp.command = XDP_SETUP_PROG;
> xdp.prog = prog;
> err = ops->ndo_xdp(dev, &xdp);
> if (err < 0 && prog)
> - bpf_prog_put(prog);
> + bpf_prog_put(prog->bpf);
>
> return err;
> }
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 298b146b47e7..a61ca13b8eaa 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2912,3 +2912,53 @@ out:
> release_sock(sk);
> return ret;
> }
> +
> +struct xdp_prog *xdp_prog_alloc(struct bpf_prog *bpf)
> +{
> + struct xdp_prog *xdp;
> +
> + xdp = kzalloc(sizeof(*xdp), GFP_KERNEL);
> + if (!xdp)
> + return ERR_PTR(-ENOMEM);
> +
> + /* Note dev_change_xdp_fd() already refcnt inc on bpf prog */
> + xdp->bpf = bpf;
> +
> + return xdp;
> +}
> +EXPORT_SYMBOL(xdp_prog_alloc);
> +
> +struct xdp_prog *xdp_prog_add(struct xdp_prog *xdp, int users)
> +{
> + struct bpf_prog *bpf;
> +
> + bpf = bpf_prog_add(xdp->bpf, users);
> + if (IS_ERR(bpf)) {
> + return (void*)bpf; /* it is already a PTR_ERR */
> + }
> + atomic_add(users, &xdp->refcnt);
> +
> + return xdp;
> +}
> +EXPORT_SYMBOL(xdp_prog_add);
> +
> +void __xdp_prog_put_rcu(struct rcu_head *rcu)
> +{
> + struct xdp_prog *xdp = container_of(rcu, struct xdp_prog, rcu);
> +
> + /* Release reference to (future) xdp_port_table here */
> +
> + /* Release refcnt from dev_change_xdp_fd() calling bpf_prog_get_type()*/
> + bpf_prog_put(xdp->bpf);
> +
> + kfree(xdp);
> +}
> +
> +void xdp_prog_put(struct xdp_prog *prog)
> +{
> + bpf_prog_put(prog->bpf);
> + if (atomic_dec_and_test(&prog->refcnt)) {
> + call_rcu(&prog->rcu, __xdp_prog_put_rcu);
> + }
> +}
> +EXPORT_SYMBOL(xdp_prog_put);
>
Powered by blists - more mailing lists