linux-kernel - Re: Random guest crashes since 5c34d002dcc7 ("virtio_pci: use shared interrupts for virtqueues")

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170403185501-mutt-send-email-mst@kernel.org>
Date:   Mon, 3 Apr 2017 19:14:22 +0300
From:   "Michael S. Tsirkin" <mst@...hat.com>
To:     Christoph Hellwig <hch@....de>
Cc:     Mike Galbraith <efault@....de>,
        Thorsten Leemhuis <linux@...mhuis.info>,
        virtio-dev@...ts.oasis-open.org,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        rjones@...hat.com
Subject: Re: Random guest crashes since 5c34d002dcc7 ("virtio_pci: use shared
 interrupts for virtqueues")

On Mon, Apr 03, 2017 at 04:18:23PM +0200, Christoph Hellwig wrote:
> Mike,
> 
> can you try the patch below?
> 
> ---
> >From fe41a30b54878cc631623b7511267125e0da4b15 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@....de>
> Date: Mon, 3 Apr 2017 14:51:35 +0200
> Subject: virtio_pci: don't use shared irq for virtqueues
> 
> Reimplement the shared irq feature manually, as we might have a larger
> number of virtqueues than the core shared interrupt code can handle
> in threaded interrupt mode.
> 
> Signed-off-by: Christoph Hellwig <hch@....de>
> ---
>  drivers/virtio/virtio_pci_common.c | 142 +++++++++++++++++++++----------------
>  drivers/virtio/virtio_pci_common.h |   1 +
>  2 files changed, 83 insertions(+), 60 deletions(-)

Well the original patch this is trying to fix is
07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 which dropped just 40 lines
with documentation. It did this by re-using error handling to switch
from per-vq to non-per-vq mode. Now this has separate flows for errors
and per-vq non-per-vq switch and (I think, as a result) is adding 140
lines which doesn't make me very happy.

> diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
> index 590534910dc6..6dd719543410 100644
> --- a/drivers/virtio/virtio_pci_common.c
> +++ b/drivers/virtio/virtio_pci_common.c
> @@ -137,6 +137,9 @@ void vp_del_vqs(struct virtio_device *vdev)
>  		kfree(vp_dev->msix_vector_map);
>  	}
>  
> +	/* free the shared virtuqueue irq if we don't use per-vq irqs */

typo

> +	if (vp_dev->shared_vq_vec)
> +		free_irq(pci_irq_vector(vp_dev->pci_dev, 1), vp_dev);

So we used to have enums for 1 and 0. I think it was cleaner.


>  	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
>  	pci_free_irq_vectors(vp_dev->pci_dev);
>  }
> @@ -147,10 +150,10 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
>  {
>  	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
>  	const char *name = dev_name(&vp_dev->vdev.dev);
> -	int i, j, err = -ENOMEM, allocated_vectors, nvectors;
> +	struct pci_dev *pdev = vp_dev->pci_dev;
> +	int i, err = -ENOMEM, nvectors;
>  	unsigned flags = PCI_IRQ_MSIX;
> -	bool shared = false;
> -	u16 msix_vec;
> +	u16 msix_vec = 0;
>  
>  	if (desc) {
>  		flags |= PCI_IRQ_AFFINITY;
> @@ -162,19 +165,18 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
>  		if (callbacks[i])
>  			nvectors++;
>  
> -	/* Try one vector per queue first. */
> -	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
> -			nvectors, flags, desc);
> +	/* Try one vector for config and one per queue first. */
> +	err = pci_alloc_irq_vectors_affinity(pdev, nvectors, nvectors, flags,
> +			desc);
>  	if (err < 0) {
>  		/* Fallback to one vector for config, one shared for queues. */
> -		shared = true;
> -		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
> +		nvectors = 2;
> +		vp_dev->shared_vq_vec = true;
> +		err = pci_alloc_irq_vectors(pdev, nvectors, nvectors,
>  				PCI_IRQ_MSIX);
>  		if (err < 0)
>  			return err;
>  	}
> -	if (err < 0)
> -		return err;
>  
>  	vp_dev->msix_vectors = nvectors;
>  	vp_dev->msix_names = kmalloc_array(nvectors,
> @@ -194,79 +196,99 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
>  	}
>  
>  	/* Set the vector used for configuration */
> -	snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
> +	snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names),
>  		 "%s-config", name);
> -	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
> -			0, vp_dev->msix_names[0], vp_dev);
> +	err = request_irq(pci_irq_vector(pdev, msix_vec), vp_config_changed, 0,
> +			  vp_dev->msix_names[msix_vec], vp_dev);
>  	if (err)
>  		goto out_free_msix_affinity_masks;
>  
>  	/* Verify we had enough resources to assign the vector */
> -	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
> +	if (vp_dev->config_vector(vp_dev, msix_vec) == VIRTIO_MSI_NO_VECTOR) {
>  		err = -EBUSY;
>  		goto out_free_config_irq;
>  	}
>  
> -	vp_dev->msix_vector_map = kmalloc_array(nvqs,
> -			sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
> -	if (!vp_dev->msix_vector_map)
> -		goto out_disable_config_irq;
> -
> -	allocated_vectors = j = 1; /* vector 0 is the config interrupt */
> -	for (i = 0; i < nvqs; ++i) {
> -		if (!names[i]) {
> -			vqs[i] = NULL;
> -			continue;
> -		}
> -
> -		if (callbacks[i])
> -			msix_vec = allocated_vectors;
> -		else
> -			msix_vec = VIRTIO_MSI_NO_VECTOR;
> -
> -		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
> -				msix_vec);
> -		if (IS_ERR(vqs[i])) {
> -			err = PTR_ERR(vqs[i]);
> -			goto out_remove_vqs;
> +	msix_vec++;
> +
> +	/*
> +	 * Use a different vector for each queue if they are available,
> +	 * else share the same vector for all VQs.
> +	 */
> +	if (vp_dev->shared_vq_vec) {
> +		snprintf(vp_dev->msix_names[msix_vec],
> +			 sizeof(vp_dev->msix_names[msix_vec]),
> +			 "%s-virtqueues", name);
> +		err = request_irq(pci_irq_vector(pdev, msix_vec),
> +				vp_vring_interrupt, 0,
> +				vp_dev->msix_names[msix_vec], vp_dev);
> +		if (err)
> +			goto out_disable_config_irq;
> +
> +		for (i = 0; i < nvqs; ++i) {
> +			if (!names[i]) {
> +				vqs[i] = NULL;
> +				continue;
> +			}
> +
> +			vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i],
> +					names[i], callbacks[i] ?
> +					msix_vec : VIRTIO_MSI_NO_VECTOR);
> +			if (IS_ERR(vqs[i])) {
> +				err = PTR_ERR(vqs[i]);
> +				goto out_remove_vqs;
> +			}
>  		}
> +	} else {
> +		vp_dev->msix_vector_map = kmalloc_array(nvqs,
> +				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
> +		if (!vp_dev->msix_vector_map)
> +			goto out_disable_config_irq;
>  
> -		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
> -			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
> -			continue;
> -		}
> +		for (i = 0; i < nvqs; ++i) {
> +			if (!names[i]) {
> +				vqs[i] = NULL;
> +				continue;
> +			}
>  
> -		snprintf(vp_dev->msix_names[j],
> -			 sizeof(*vp_dev->msix_names), "%s-%s",
> -			 dev_name(&vp_dev->vdev.dev), names[i]);
> -		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
> -				  vring_interrupt, IRQF_SHARED,
> -				  vp_dev->msix_names[j], vqs[i]);
> -		if (err) {
>  			/* don't free this irq on error */
>  			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
> -			goto out_remove_vqs;
> +
> +			vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i],
> +					names[i], callbacks[i] ?
> +					msix_vec : VIRTIO_MSI_NO_VECTOR);
> +			if (IS_ERR(vqs[i])) {
> +				err = PTR_ERR(vqs[i]);
> +				goto out_remove_vqs;
> +			}
> +
> +			if (!callbacks[i])
> +				continue;
> +
> +			snprintf(vp_dev->msix_names[msix_vec],
> +				 sizeof(*vp_dev->msix_names), "%s-%s",
> +				 dev_name(&vp_dev->vdev.dev), names[i]);
> +			err = request_irq(pci_irq_vector(pdev, msix_vec),
> +					  vring_interrupt, IRQF_SHARED,
> +					  vp_dev->msix_names[msix_vec],
> +					  vqs[i]);
> +			if (err)
> +				goto out_remove_vqs;
> +			vp_dev->msix_vector_map[i] = msix_vec++;
>  		}
> -		vp_dev->msix_vector_map[i] = msix_vec;
> -		j++;
> -
> -		/*
> -		 * Use a different vector for each queue if they are available,
> -		 * else share the same vector for all VQs.
> -		 */
> -		if (!shared)
> -			allocated_vectors++;
>  	}
>  
>  	return 0;
>  
>  out_remove_vqs:
>  	vp_remove_vqs(vdev);
> +	if (vp_dev->shared_vq_vec)
> +		free_irq(pci_irq_vector(pdev, 1), vp_dev);
>  	kfree(vp_dev->msix_vector_map);
>  out_disable_config_irq:
>  	vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
>  out_free_config_irq:
> -	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
> +	free_irq(pci_irq_vector(pdev, 0), vp_dev);
>  out_free_msix_affinity_masks:
>  	for (i = 0; i < nvectors; i++) {
>  		if (vp_dev->msix_affinity_masks[i])
> @@ -276,7 +298,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
>  out_free_msix_names:
>  	kfree(vp_dev->msix_names);
>  out_free_irq_vectors:
> -	pci_free_irq_vectors(vp_dev->pci_dev);
> +	pci_free_irq_vectors(pdev);
>  	return err;
>  }
>  
> @@ -346,7 +368,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
>  	if (!vq->callback)
>  		return -EINVAL;
>  
> -	if (vp_dev->pci_dev->msix_enabled) {
> +	if (vp_dev->msix_vector_map) {
>  		int vec = vp_dev->msix_vector_map[vq->index];
>  		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
>  		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
> diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
> index ac8c9d788964..d6d7fb99e47f 100644
> --- a/drivers/virtio/virtio_pci_common.h
> +++ b/drivers/virtio/virtio_pci_common.h
> @@ -72,6 +72,7 @@ struct virtio_pci_device {
>  	int msix_vectors;
>  	/* Map of per-VQ MSI-X vectors, may be NULL */
>  	unsigned *msix_vector_map;
> +	bool shared_vq_vec;


Pls add documentation. In fact I'd prefer a counter of vectors
used than a separate mode.
>  
>  	struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
>  				      unsigned idx,
> -- 
> 2.11.0