lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEtBeUnDeD0zYBvpwjhQ4Lv0dz8mBDQ_C-yP1VEaQdv-0A@mail.gmail.com>
Date: Fri, 2 Aug 2024 11:41:57 +0800
From: Jason Wang <jasowang@...hat.com>
To: Heng Qi <hengqi@...ux.alibaba.com>
Cc: "Michael S. Tsirkin" <mst@...hat.com>, netdev@...r.kernel.org, 
	Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, Eugenio Pérez <eperezma@...hat.com>, 
	"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>
Subject: Re: [PATCH net-next] virtio_net: Prevent misidentified spurious
 interrupts from killing the irq

On Thu, Aug 1, 2024 at 9:56 PM Heng Qi <hengqi@...ux.alibaba.com> wrote:
>
> Michael has effectively reduced the number of spurious interrupts in
> commit a7766ef18b33 ("virtio_net: disable cb aggressively") by disabling
> irq callbacks before cleaning old buffers.
>
> But it is still possible that the irq is killed by mistake:
>
>   When a delayed tx interrupt arrives, old buffers has been cleaned in
>   other paths (start_xmit and virtnet_poll_cleantx), then the interrupt is
>   mistakenly identified as a spurious interrupt in vring_interrupt.
>
>   We should refrain from labeling it as a spurious interrupt; otherwise,
>   note_interrupt may inadvertently kill the legitimate irq.

I think the evil came from where we do free_old_xmit() in
start_xmit(). I know it is for performance, but we may need to make
the code work correctly instead of adding endless hacks. Personally, I
think the virtio-net TX path is over-complicated. We probably pay too
much (e.g there's netif_tx_lock in TX NAPI path) to try to "optimize"
the performance.

How about just don't do free_old_xmit and do that solely in the TX NAPI?

>
> Signed-off-by: Heng Qi <hengqi@...ux.alibaba.com>
> ---
>  drivers/net/virtio_net.c     |  9 ++++++
>  drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  3 ++
>  3 files changed, 65 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 0383a3e136d6..6d8739418203 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2769,6 +2769,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
>                 do {
>                         virtqueue_disable_cb(sq->vq);
>                         free_old_xmit(sq, txq, !!budget);
> +                       virtqueue_set_tx_oldbuf_cleaned(sq->vq, true);
>                 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
>
>                 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> @@ -3035,6 +3036,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>
>                 free_old_xmit(sq, txq, false);
>
> +               if (use_napi)
> +                       virtqueue_set_tx_oldbuf_cleaned(sq->vq, true);
> +
>         } while (use_napi && !xmit_more &&
>                unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
>
> @@ -3044,6 +3048,11 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>         /* Try to transmit */
>         err = xmit_skb(sq, skb, !use_napi);
>
> +       if (use_napi) {
> +               virtqueue_set_tx_newbuf_sent(sq->vq, true);
> +               virtqueue_set_tx_oldbuf_cleaned(sq->vq, false);
> +       }
> +
>         /* This should not happen! */
>         if (unlikely(err)) {
>                 DEV_STATS_INC(dev, tx_fifo_errors);
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index be7309b1e860..fb2afc716371 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -180,6 +180,11 @@ struct vring_virtqueue {
>          */
>         bool do_unmap;
>
> +       /* Has any new data been sent? */
> +       bool is_tx_newbuf_sent;
> +       /* Is the old data recently sent cleaned up? */
> +       bool is_tx_oldbuf_cleaned;
> +
>         /* Head of free buffer list. */
>         unsigned int free_head;
>         /* Number we've added since last sync. */
> @@ -2092,6 +2097,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
>         vq->use_dma_api = vring_use_dma_api(vdev);
>         vq->premapped = false;
>         vq->do_unmap = vq->use_dma_api;
> +       vq->is_tx_newbuf_sent = false; /* Initially, no new buffer to send. */
> +       vq->is_tx_oldbuf_cleaned = true; /* Initially, no old buffer to clean. */
> +
>
>         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
>                 !context;
> @@ -2375,6 +2383,38 @@ bool virtqueue_notify(struct virtqueue *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_notify);
>
> +/**
> + * virtqueue_set_tx_newbuf_sent - set whether there is new tx buf to send.
> + * @_vq: the struct virtqueue
> + *
> + * If is_tx_newbuf_sent and is_tx_oldbuf_cleaned are both true, the
> + * spurious interrupt is caused by polling TX vq in other paths outside
> + * the tx irq callback.
> + */
> +void virtqueue_set_tx_newbuf_sent(struct virtqueue *_vq, bool val)
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +       vq->is_tx_newbuf_sent = val;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_set_tx_newbuf_sent);
> +
> +/**
> + * virtqueue_set_tx_oldbuf_cleaned - set whether there is old tx buf to clean.
> + * @_vq: the struct virtqueue
> + *
> + * If is_tx_oldbuf_cleaned and is_tx_newbuf_sent are both true, the
> + * spurious interrupt is caused by polling TX vq in other paths outside
> + * the tx irq callback.
> + */
> +void virtqueue_set_tx_oldbuf_cleaned(struct virtqueue *_vq, bool val)
> +{
> +       struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +       vq->is_tx_oldbuf_cleaned = val;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_set_tx_oldbuf_cleaned);
> +
>  /**
>   * virtqueue_kick - update after add_buf
>   * @vq: the struct virtqueue
> @@ -2572,6 +2612,16 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
>         struct vring_virtqueue *vq = to_vvq(_vq);
>
>         if (!more_used(vq)) {
> +               /* When the delayed TX interrupt arrives, the old buffers are
> +                * cleaned in other cases(start_xmit and virtnet_poll_cleantx).
> +                * We'd better not identify it as a spurious interrupt,
> +                * otherwise note_interrupt may kill the interrupt.
> +                */
> +               if (unlikely(vq->is_tx_newbuf_sent && vq->is_tx_oldbuf_cleaned)) {
> +                       vq->is_tx_newbuf_sent = false;
> +                       return IRQ_HANDLED;
> +               }

This is the general virtio code, it's better to avoid any device specific logic.

> +
>                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
>                 return IRQ_NONE;
>         }
> @@ -2637,6 +2687,9 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
>         vq->use_dma_api = vring_use_dma_api(vdev);
>         vq->premapped = false;
>         vq->do_unmap = vq->use_dma_api;
> +       vq->is_tx_newbuf_sent = false; /* Initially, no new buffer to send. */
> +       vq->is_tx_oldbuf_cleaned = true; /* Initially, no old buffer to clean. */
> +
>
>         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
>                 !context;
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index ecc5cb7b8c91..ba3be9276c09 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -103,6 +103,9 @@ int virtqueue_resize(struct virtqueue *vq, u32 num,
>  int virtqueue_reset(struct virtqueue *vq,
>                     void (*recycle)(struct virtqueue *vq, void *buf));
>
> +void virtqueue_set_tx_newbuf_sent(struct virtqueue *vq, bool val);
> +void virtqueue_set_tx_oldbuf_cleaned(struct virtqueue *vq, bool val);
> +
>  struct virtio_admin_cmd {
>         __le16 opcode;
>         __le16 group_type;
> --
> 2.32.0.3.g01195cf9f
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ