[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEvbxs4AK+xCW0i-ZMo4B5WEKMLmFHBu_7ZRa+4Pv+-44w@mail.gmail.com>
Date: Tue, 24 Sep 2024 15:35:08 +0800
From: Jason Wang <jasowang@...hat.com>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
Cc: netdev@...r.kernel.org, "Michael S. Tsirkin" <mst@...hat.com>,
Eugenio Pérez <eperezma@...hat.com>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>, virtualization@...ts.linux.dev,
bpf@...r.kernel.org
Subject: Re: [RFC net-next v1 10/12] virtio_net: xsk: tx: support xmit xsk buffer
On Tue, Sep 24, 2024 at 9:32 AM Xuan Zhuo <xuanzhuo@...ux.alibaba.com> wrote:
>
> The driver's tx napi is very important for XSK. It is responsible for
> obtaining data from the XSK queue and sending it out.
>
> At the beginning, we need to trigger tx napi.
>
> virtnet_free_old_xmit distinguishes three type ptr(skb, xdp frame, xsk
> buffer) by the last bits of the pointer.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
> ---
> drivers/net/virtio_net.c | 176 ++++++++++++++++++++++++++++++++++++---
> 1 file changed, 166 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 3ad4c6e3ef18..1a870f1df910 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -83,6 +83,7 @@ struct virtnet_sq_free_stats {
> u64 bytes;
> u64 napi_packets;
> u64 napi_bytes;
> + u64 xsk;
> };
>
> struct virtnet_sq_stats {
> @@ -514,16 +515,20 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
> struct sk_buff *curr_skb,
> struct page *page, void *buf,
> int len, int truesize);
> +static void virtnet_xsk_completed(struct send_queue *sq, int num);
>
> enum virtnet_xmit_type {
> VIRTNET_XMIT_TYPE_SKB,
> VIRTNET_XMIT_TYPE_SKB_ORPHAN,
> VIRTNET_XMIT_TYPE_XDP,
> + VIRTNET_XMIT_TYPE_XSK,
> };
>
> /* We use the last two bits of the pointer to distinguish the xmit type. */
> #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1))
>
> +#define VIRTIO_XSK_FLAG_OFFSET 4
Any reason this is not 2?
> +
> static enum virtnet_xmit_type virtnet_xmit_ptr_strip(void **ptr)
> {
> unsigned long p = (unsigned long)*ptr;
> @@ -546,6 +551,11 @@ static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data,
> GFP_ATOMIC);
> }
>
> +static u32 virtnet_ptr_to_xsk_buff_len(void *ptr)
> +{
> + return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
> +}
> +
> static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> {
> sg_assign_page(sg, NULL);
> @@ -587,11 +597,27 @@ static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> stats->bytes += xdp_get_frame_len(frame);
> xdp_return_frame(frame);
> break;
> +
> + case VIRTNET_XMIT_TYPE_XSK:
> + stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr);
> + stats->xsk++;
> + break;
> }
> }
> netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
Not related to this patch, but this seems unnecessary to AF_XDP.
> }
>
> +static void virtnet_free_old_xmit(struct send_queue *sq,
> + struct netdev_queue *txq,
> + bool in_napi,
> + struct virtnet_sq_free_stats *stats)
> +{
> + __free_old_xmit(sq, txq, in_napi, stats);
> +
> + if (stats->xsk)
> + virtnet_xsk_completed(sq, stats->xsk);
> +}
> +
> /* Converting between virtqueue no. and kernel tx/rx queue no.
> * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
> */
> @@ -1019,7 +1045,7 @@ static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> {
> struct virtnet_sq_free_stats stats = {0};
>
> - __free_old_xmit(sq, txq, in_napi, &stats);
> + virtnet_free_old_xmit(sq, txq, in_napi, &stats);
>
> /* Avoid overhead when no packets have been processed
> * happens when called speculatively from start_xmit.
> @@ -1380,6 +1406,111 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
> return err;
> }
>
> +static void *virtnet_xsk_to_ptr(u32 len)
> +{
> + unsigned long p;
> +
> + p = len << VIRTIO_XSK_FLAG_OFFSET;
> +
> + return virtnet_xmit_ptr_mix((void *)p, VIRTNET_XMIT_TYPE_XSK);
> +}
> +
> +static int virtnet_xsk_xmit_one(struct send_queue *sq,
> + struct xsk_buff_pool *pool,
> + struct xdp_desc *desc)
> +{
> + struct virtnet_info *vi;
> + dma_addr_t addr;
> +
> + vi = sq->vq->vdev->priv;
> +
> + addr = xsk_buff_raw_get_dma(pool, desc->addr);
> + xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len);
> +
> + sg_init_table(sq->sg, 2);
> +
> + sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len);
> + sg_fill_dma(sq->sg + 1, addr, desc->len);
> +
> + return virtqueue_add_outbuf(sq->vq, sq->sg, 2,
> + virtnet_xsk_to_ptr(desc->len), GFP_ATOMIC);
> +}
> +
> +static int virtnet_xsk_xmit_batch(struct send_queue *sq,
> + struct xsk_buff_pool *pool,
> + unsigned int budget,
> + u64 *kicks)
> +{
> + struct xdp_desc *descs = pool->tx_descs;
> + bool kick = false;
> + u32 nb_pkts, i;
> + int err;
> +
> + budget = min_t(u32, budget, sq->vq->num_free);
> +
> + nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget);
> + if (!nb_pkts)
> + return 0;
> +
> + for (i = 0; i < nb_pkts; i++) {
> + err = virtnet_xsk_xmit_one(sq, pool, &descs[i]);
> + if (unlikely(err)) {
> + xsk_tx_completed(sq->xsk_pool, nb_pkts - i);
> + break;
> + }
> +
> + kick = true;
> + }
> +
> + if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
> + (*kicks)++;
> +
> + return i;
> +}
> +
> +static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool,
> + int budget)
> +{
> + struct virtnet_info *vi = sq->vq->vdev->priv;
> + struct virtnet_sq_free_stats stats = {};
> + struct net_device *dev = vi->dev;
> + u64 kicks = 0;
> + int sent;
> +
> + /* Avoid to wakeup napi meanless, so call __free_old_xmit. */
I don't understand the meaning of this comment.
> + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats);
> +
> + if (stats.xsk)
> + xsk_tx_completed(sq->xsk_pool, stats.xsk);
> +
> + sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks);
> +
> + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
> + check_sq_full_and_disable(vi, vi->dev, sq);
> +
> + u64_stats_update_begin(&sq->stats.syncp);
> + u64_stats_add(&sq->stats.packets, stats.packets);
> + u64_stats_add(&sq->stats.bytes, stats.bytes);
> + u64_stats_add(&sq->stats.kicks, kicks);
> + u64_stats_add(&sq->stats.xdp_tx, sent);
> + u64_stats_update_end(&sq->stats.syncp);
> +
> + if (xsk_uses_need_wakeup(pool))
> + xsk_set_tx_need_wakeup(pool);
> +
> + return sent == budget;
> +}
> +
> +static void xsk_wakeup(struct send_queue *sq)
> +{
> + if (napi_if_scheduled_mark_missed(&sq->napi))
> + return;
> +
> + local_bh_disable();
> + virtqueue_napi_schedule(&sq->napi, sq->vq);
> + local_bh_enable();
> +}
> +
> static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
> {
> struct virtnet_info *vi = netdev_priv(dev);
> @@ -1393,14 +1524,19 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
>
> sq = &vi->sq[qid];
>
> - if (napi_if_scheduled_mark_missed(&sq->napi))
> - return 0;
> + xsk_wakeup(sq);
> + return 0;
> +}
>
> - local_bh_disable();
> - virtqueue_napi_schedule(&sq->napi, sq->vq);
> - local_bh_enable();
> +static void virtnet_xsk_completed(struct send_queue *sq, int num)
> +{
> + xsk_tx_completed(sq->xsk_pool, num);
>
> - return 0;
> + /* If this is called by rx poll, start_xmit and xdp xmit we should
> + * wakeup the tx napi to consume the xsk tx queue, because the tx
> + * interrupt may not be triggered.
> + */
> + xsk_wakeup(sq);
> }
>
> static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
> @@ -1516,8 +1652,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,
> }
>
> /* Free up any pending old buffers before queueing new ones. */
> - __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
> - false, &stats);
> + virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
> + false, &stats);
>
> for (i = 0; i < n; i++) {
> struct xdp_frame *xdpf = frames[i];
> @@ -2961,6 +3097,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> struct virtnet_info *vi = sq->vq->vdev->priv;
> unsigned int index = vq2txq(sq->vq);
> struct netdev_queue *txq;
> + bool xsk_busy = false;
> int opaque;
> bool done;
>
> @@ -2973,7 +3110,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> txq = netdev_get_tx_queue(vi->dev, index);
> __netif_tx_lock(txq, raw_smp_processor_id());
> virtqueue_disable_cb(sq->vq);
> - free_old_xmit(sq, txq, !!budget);
> +
> + if (sq->xsk_pool)
> + xsk_busy = virtnet_xsk_xmit(sq, sq->xsk_pool, budget);
I think we need a better name of "xsk_busy", it looks like it means we
exceeds the quota. Or just return the number of buffers received and
let the caller to judge.
Other looks good.
With this fixed.
Acked-by: Jason Wang <jasowang@...hat.com>
Thanks
Powered by blists - more mailing lists