lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEtgWw9J4Y8fQCWY4ED_=Bdi6ArmOMMA2OjyGbOOu90OTg@mail.gmail.com>
Date: Fri, 13 Sep 2024 11:21:21 +0800
From: Jason Wang <jasowang@...hat.com>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
Cc: netdev@...r.kernel.org, "Michael S. Tsirkin" <mst@...hat.com>, 
	Eugenio Pérez <eperezma@...hat.com>, 
	"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>, 
	Daniel Borkmann <daniel@...earbox.net>, Jesper Dangaard Brouer <hawk@...nel.org>, 
	John Fastabend <john.fastabend@...il.com>, virtualization@...ts.linux.dev, 
	bpf@...r.kernel.org
Subject: Re: [PATCH net-next 10/13] virtio_net: xsk: tx: support xmit xsk buffer

On Thu, Sep 12, 2024 at 4:50 PM Xuan Zhuo <xuanzhuo@...ux.alibaba.com> wrote:
>
> On Wed, 11 Sep 2024 12:31:32 +0800, Jason Wang <jasowang@...hat.com> wrote:
> > On Tue, Aug 20, 2024 at 3:33 PM Xuan Zhuo <xuanzhuo@...ux.alibaba.com> wrote:
> > >
> > > The driver's tx napi is very important for XSK. It is responsible for
> > > obtaining data from the XSK queue and sending it out.
> > >
> > > At the beginning, we need to trigger tx napi.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
> > > ---
> > >  drivers/net/virtio_net.c | 127 ++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 125 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 221681926d23..3743694d3c3b 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -516,10 +516,13 @@ enum virtnet_xmit_type {
> > >         VIRTNET_XMIT_TYPE_SKB,
> > >         VIRTNET_XMIT_TYPE_ORPHAN,
> > >         VIRTNET_XMIT_TYPE_XDP,
> > > +       VIRTNET_XMIT_TYPE_XSK,
> > >  };
> > >
> > >  #define VIRTNET_XMIT_TYPE_MASK (VIRTNET_XMIT_TYPE_SKB | VIRTNET_XMIT_TYPE_ORPHAN \
> > > -                               | VIRTNET_XMIT_TYPE_XDP)
> > > +                               | VIRTNET_XMIT_TYPE_XDP | VIRTNET_XMIT_TYPE_XSK)
> > > +
> > > +#define VIRTIO_XSK_FLAG_OFFSET 4
> > >
> > >  static enum virtnet_xmit_type virtnet_xmit_ptr_strip(void **ptr)
> > >  {
> > > @@ -543,6 +546,11 @@ static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data,
> > >                                     GFP_ATOMIC);
> > >  }
> > >
> > > +static u32 virtnet_ptr_to_xsk(void *ptr)
> > > +{
> > > +       return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
> > > +}
> > > +
> >
> > This needs a better name, otherwise readers might be confused.
> >
> > E.g something like virtnet_ptr_to_xsk_buff_len()?
> >
> > >  static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > >  {
> > >         sg_assign_page(sg, NULL);
> > > @@ -584,6 +592,10 @@ static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> > >                         stats->bytes += xdp_get_frame_len(frame);
> > >                         xdp_return_frame(frame);
> > >                         break;
> > > +
> > > +               case VIRTNET_XMIT_TYPE_XSK:
> > > +                       stats->bytes += virtnet_ptr_to_xsk(ptr);
> > > +                       break;
> >
> > Do we miss xsk_tx_completed() here?
> >
> > >                 }
> > >         }
> > >         netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
> > > @@ -1393,6 +1405,97 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
> > >         return 0;
> > >  }
> > >
> > > +static void *virtnet_xsk_to_ptr(u32 len)
> > > +{
> > > +       unsigned long p;
> > > +
> > > +       p = len << VIRTIO_XSK_FLAG_OFFSET;
> > > +
> > > +       return virtnet_xmit_ptr_mix((void *)p, VIRTNET_XMIT_TYPE_XSK);
> > > +}
> > > +
> > > +static int virtnet_xsk_xmit_one(struct send_queue *sq,
> > > +                               struct xsk_buff_pool *pool,
> > > +                               struct xdp_desc *desc)
> > > +{
> > > +       struct virtnet_info *vi;
> > > +       dma_addr_t addr;
> > > +
> > > +       vi = sq->vq->vdev->priv;
> > > +
> > > +       addr = xsk_buff_raw_get_dma(pool, desc->addr);
> > > +       xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len);
> > > +
> > > +       sg_init_table(sq->sg, 2);
> > > +
> > > +       sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len);
> > > +       sg_fill_dma(sq->sg + 1, addr, desc->len);
> > > +
> > > +       return virtqueue_add_outbuf(sq->vq, sq->sg, 2,
> > > +                                   virtnet_xsk_to_ptr(desc->len), GFP_ATOMIC);
> > > +}
> > > +
> > > +static int virtnet_xsk_xmit_batch(struct send_queue *sq,
> > > +                                 struct xsk_buff_pool *pool,
> > > +                                 unsigned int budget,
> > > +                                 u64 *kicks)
> > > +{
> > > +       struct xdp_desc *descs = pool->tx_descs;
> > > +       bool kick = false;
> > > +       u32 nb_pkts, i;
> > > +       int err;
> > > +
> > > +       budget = min_t(u32, budget, sq->vq->num_free);
> > > +
> > > +       nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget);
> > > +       if (!nb_pkts)
> > > +               return 0;
> > > +
> > > +       for (i = 0; i < nb_pkts; i++) {
> > > +               err = virtnet_xsk_xmit_one(sq, pool, &descs[i]);
> > > +               if (unlikely(err)) {
> > > +                       xsk_tx_completed(sq->xsk_pool, nb_pkts - i);
> >
> > Should we kick in this condition?
> >
> > > +                       break;
> > > +               }
> > > +
> > > +               kick = true;
> > > +       }
> > > +
> > > +       if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
> >
> > Can we simply use virtqueue_kick() here?
> >
> > > +               (*kicks)++;
> > > +
> > > +       return i;
> > > +}
> > > +
> > > +static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool,
> > > +                            int budget)
> > > +{
> > > +       struct virtnet_info *vi = sq->vq->vdev->priv;
> > > +       struct virtnet_sq_free_stats stats = {};
> > > +       struct net_device *dev = vi->dev;
> > > +       u64 kicks = 0;
> > > +       int sent;
> > > +
> > > +       __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats);
> >
> > I haven't checked in depth, but I wonder if we have some side effects
> > when using non NAPI tx mode:
> >
> >         if (napi->weight)
> >                 virtqueue_napi_schedule(napi, vq);
> >         else
> >                 /* We were probably waiting for more output buffers. */
> >                 netif_wake_subqueue(vi->dev, vq2txq(vq));
> >
> > Does this mean xsk will suffer the same issue like when there's no xsk
> > xmit request, we could end up with no way to reclaim xmitted xsk
> > buffers? (Or should we disallow AF_XDP to be used for non TX-NAPI
> > mode?)
>
> Disallow AF_XDP to be used for non TX-NAPI mode.
>
> The last patch #9 does this.
>
> #9 [PATCH net-next 09/13] virtio_net: xsk: prevent disable tx napi

Great, for some reason I miss that.

Thanks

>
> Thanks.
> >
> > > +
> > > +       sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks);
> > > +
> > > +       if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
> > > +               check_sq_full_and_disable(vi, vi->dev, sq);
> > > +
> > > +       u64_stats_update_begin(&sq->stats.syncp);
> > > +       u64_stats_add(&sq->stats.packets, stats.packets);
> > > +       u64_stats_add(&sq->stats.bytes,   stats.bytes);
> > > +       u64_stats_add(&sq->stats.kicks,   kicks);
> > > +       u64_stats_add(&sq->stats.xdp_tx,  sent);
> > > +       u64_stats_update_end(&sq->stats.syncp);
> > > +
> > > +       if (xsk_uses_need_wakeup(pool))
> > > +               xsk_set_tx_need_wakeup(pool);
> > > +
> > > +       return sent == budget;
> > > +}
> > > +
> > >  static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
> > >                                    struct send_queue *sq,
> > >                                    struct xdp_frame *xdpf)
> > > @@ -2949,6 +3052,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> > >         struct virtnet_info *vi = sq->vq->vdev->priv;
> > >         unsigned int index = vq2txq(sq->vq);
> > >         struct netdev_queue *txq;
> > > +       bool xsk_busy = false;
> > >         int opaque;
> > >         bool done;
> > >
> > > @@ -2961,7 +3065,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> > >         txq = netdev_get_tx_queue(vi->dev, index);
> > >         __netif_tx_lock(txq, raw_smp_processor_id());
> > >         virtqueue_disable_cb(sq->vq);
> > > -       free_old_xmit(sq, txq, !!budget);
> > > +
> > > +       if (sq->xsk_pool)
> > > +               xsk_busy = virtnet_xsk_xmit(sq, sq->xsk_pool, budget);
> > > +       else
> > > +               free_old_xmit(sq, txq, !!budget);
> > >
> > >         if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> > >                 if (netif_tx_queue_stopped(txq)) {
> > > @@ -2972,6 +3080,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> > >                 netif_tx_wake_queue(txq);
> > >         }
> > >
> > > +       if (xsk_busy) {
> > > +               __netif_tx_unlock(txq);
> > > +               return budget;
> > > +       }
> > > +
> > >         opaque = virtqueue_enable_cb_prepare(sq->vq);
> > >
> > >         done = napi_complete_done(napi, 0);
> > > @@ -5974,6 +6087,12 @@ static void free_receive_page_frags(struct virtnet_info *vi)
> > >
> > >  static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
> > >  {
> > > +       struct virtnet_info *vi = vq->vdev->priv;
> > > +       struct send_queue *sq;
> > > +       int i = vq2rxq(vq);
> > > +
> > > +       sq = &vi->sq[i];
> > > +
> > >         switch (virtnet_xmit_ptr_strip(&buf)) {
> > >         case VIRTNET_XMIT_TYPE_SKB:
> > >         case VIRTNET_XMIT_TYPE_ORPHAN:
> > > @@ -5983,6 +6102,10 @@ static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
> > >         case VIRTNET_XMIT_TYPE_XDP:
> > >                 xdp_return_frame(buf);
> > >                 break;
> > > +
> > > +       case VIRTNET_XMIT_TYPE_XSK:
> > > +               xsk_tx_completed(sq->xsk_pool, 1);
> > > +               break;
> > >         }
> > >  }
> > >
> > > --
> > > 2.32.0.3.g01195cf9f
> > >
> >
> > Thanks
> >
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ