lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <39b0ed62-58e0-4e71-ac22-6426d9ded2e7@redhat.com>
Date: Mon, 11 Mar 2024 10:15:28 +0100
From: Eric Auger <eric.auger@...hat.com>
To: Alex Williamson <alex.williamson@...hat.com>
Cc: kvm@...r.kernel.org, clg@...hat.com, reinette.chatre@...el.com,
 linux-kernel@...r.kernel.org, kevin.tian@...el.com, stable@...r.kernel.org
Subject: Re: [PATCH v2 4/7] vfio/pci: Create persistent INTx handler



On 3/9/24 00:05, Alex Williamson wrote:
> A vulnerability exists where the eventfd for INTx signaling can be
> deconfigured, which unregisters the IRQ handler but still allows
> eventfds to be signaled with a NULL context through the SET_IRQS ioctl
> or through unmask irqfd if the device interrupt is pending.
>
> Ideally this could be solved with some additional locking; the igate
> mutex serializes the ioctl and config space accesses, and the interrupt
> handler is unregistered relative to the trigger, but the irqfd path
> runs asynchronous to those.  The igate mutex cannot be acquired from the
> atomic context of the eventfd wake function.  Disabling the irqfd
> relative to the eventfd registration is potentially incompatible with
> existing userspace.
>
> As a result, the solution implemented here moves configuration of the
> INTx interrupt handler to track the lifetime of the INTx context object
> and irq_type configuration, rather than registration of a particular
> trigger eventfd.  Synchronization is added between the ioctl path and
> eventfd_signal() wrapper such that the eventfd trigger can be
> dynamically updated relative to in-flight interrupts or irqfd callbacks.
>
> Cc: stable@...r.kernel.org
> Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver")
> Reported-by: Reinette Chatre <reinette.chatre@...el.com>
> Reviewed-by: Kevin Tian <kevin.tian@...el.com>
> Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>
> Signed-off-by: Alex Williamson <alex.williamson@...hat.com>
Reviewed-by: Eric Auger <eric.auger@...hat.com>

Eric
> ---
>  drivers/vfio/pci/vfio_pci_intrs.c | 145 ++++++++++++++++--------------
>  1 file changed, 78 insertions(+), 67 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
> index 75c85eec21b3..fb5392b749ff 100644
> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> @@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
>  
>  	if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
>  		struct vfio_pci_irq_ctx *ctx;
> +		struct eventfd_ctx *trigger;
>  
>  		ctx = vfio_irq_ctx_get(vdev, 0);
>  		if (WARN_ON_ONCE(!ctx))
>  			return;
> -		eventfd_signal(ctx->trigger);
> +
> +		trigger = READ_ONCE(ctx->trigger);
> +		if (likely(trigger))
> +			eventfd_signal(trigger);
>  	}
>  }
>  
> @@ -253,100 +257,100 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
>  	return ret;
>  }
>  
> -static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
> +static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
> +			    struct eventfd_ctx *trigger)
>  {
> +	struct pci_dev *pdev = vdev->pdev;
>  	struct vfio_pci_irq_ctx *ctx;
> +	unsigned long irqflags;
> +	char *name;
> +	int ret;
>  
>  	if (!is_irq_none(vdev))
>  		return -EINVAL;
>  
> -	if (!vdev->pdev->irq)
> +	if (!pdev->irq)
>  		return -ENODEV;
>  
> +	name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
> +	if (!name)
> +		return -ENOMEM;
> +
>  	ctx = vfio_irq_ctx_alloc(vdev, 0);
>  	if (!ctx)
>  		return -ENOMEM;
>  
> +	ctx->name = name;
> +	ctx->trigger = trigger;
> +
>  	/*
> -	 * If the virtual interrupt is masked, restore it.  Devices
> -	 * supporting DisINTx can be masked at the hardware level
> -	 * here, non-PCI-2.3 devices will have to wait until the
> -	 * interrupt is enabled.
> +	 * Fill the initial masked state based on virq_disabled.  After
> +	 * enable, changing the DisINTx bit in vconfig directly changes INTx
> +	 * masking.  igate prevents races during setup, once running masked
> +	 * is protected via irqlock.
> +	 *
> +	 * Devices supporting DisINTx also reflect the current mask state in
> +	 * the physical DisINTx bit, which is not affected during IRQ setup.
> +	 *
> +	 * Devices without DisINTx support require an exclusive interrupt.
> +	 * IRQ masking is performed at the IRQ chip.  Again, igate protects
> +	 * against races during setup and IRQ handlers and irqfds are not
> +	 * yet active, therefore masked is stable and can be used to
> +	 * conditionally auto-enable the IRQ.
> +	 *
> +	 * irq_type must be stable while the IRQ handler is registered,
> +	 * therefore it must be set before request_irq().
>  	 */
>  	ctx->masked = vdev->virq_disabled;
> -	if (vdev->pci_2_3)
> -		pci_intx(vdev->pdev, !ctx->masked);
> +	if (vdev->pci_2_3) {
> +		pci_intx(pdev, !ctx->masked);
> +		irqflags = IRQF_SHARED;
> +	} else {
> +		irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
> +	}
>  
>  	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
>  
> +	ret = request_irq(pdev->irq, vfio_intx_handler,
> +			  irqflags, ctx->name, vdev);
> +	if (ret) {
> +		vdev->irq_type = VFIO_PCI_NUM_IRQS;
> +		kfree(name);
> +		vfio_irq_ctx_free(vdev, ctx, 0);
> +		return ret;
> +	}
> +
>  	return 0;
>  }
>  
> -static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
> +static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
> +				struct eventfd_ctx *trigger)
>  {
>  	struct pci_dev *pdev = vdev->pdev;
> -	unsigned long irqflags = IRQF_SHARED;
>  	struct vfio_pci_irq_ctx *ctx;
> -	struct eventfd_ctx *trigger;
> -	unsigned long flags;
> -	int ret;
> +	struct eventfd_ctx *old;
>  
>  	ctx = vfio_irq_ctx_get(vdev, 0);
>  	if (WARN_ON_ONCE(!ctx))
>  		return -EINVAL;
>  
> -	if (ctx->trigger) {
> -		free_irq(pdev->irq, vdev);
> -		kfree(ctx->name);
> -		eventfd_ctx_put(ctx->trigger);
> -		ctx->trigger = NULL;
> -	}
> -
> -	if (fd < 0) /* Disable only */
> -		return 0;
> -
> -	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
> -			      pci_name(pdev));
> -	if (!ctx->name)
> -		return -ENOMEM;
> -
> -	trigger = eventfd_ctx_fdget(fd);
> -	if (IS_ERR(trigger)) {
> -		kfree(ctx->name);
> -		return PTR_ERR(trigger);
> -	}
> +	old = ctx->trigger;
>  
> -	ctx->trigger = trigger;
> +	WRITE_ONCE(ctx->trigger, trigger);
>  
> -	/*
> -	 * Devices without DisINTx support require an exclusive interrupt,
> -	 * IRQ masking is performed at the IRQ chip.  The masked status is
> -	 * protected by vdev->irqlock. Setup the IRQ without auto-enable and
> -	 * unmask as necessary below under lock.  DisINTx is unmodified by
> -	 * the IRQ configuration and may therefore use auto-enable.
> -	 */
> -	if (!vdev->pci_2_3)
> -		irqflags = IRQF_NO_AUTOEN;
> -
> -	ret = request_irq(pdev->irq, vfio_intx_handler,
> -			  irqflags, ctx->name, vdev);
> -	if (ret) {
> -		ctx->trigger = NULL;
> -		kfree(ctx->name);
> -		eventfd_ctx_put(trigger);
> -		return ret;
> +	/* Releasing an old ctx requires synchronizing in-flight users */
> +	if (old) {
> +		synchronize_irq(pdev->irq);
> +		vfio_virqfd_flush_thread(&ctx->unmask);
> +		eventfd_ctx_put(old);
>  	}
>  
> -	spin_lock_irqsave(&vdev->irqlock, flags);
> -	if (!vdev->pci_2_3 && !ctx->masked)
> -		enable_irq(pdev->irq);
> -	spin_unlock_irqrestore(&vdev->irqlock, flags);
> -
>  	return 0;
>  }
>  
>  static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
>  {
> +	struct pci_dev *pdev = vdev->pdev;
>  	struct vfio_pci_irq_ctx *ctx;
>  
>  	ctx = vfio_irq_ctx_get(vdev, 0);
> @@ -354,10 +358,13 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
>  	if (ctx) {
>  		vfio_virqfd_disable(&ctx->unmask);
>  		vfio_virqfd_disable(&ctx->mask);
> +		free_irq(pdev->irq, vdev);
> +		if (ctx->trigger)
> +			eventfd_ctx_put(ctx->trigger);
> +		kfree(ctx->name);
> +		vfio_irq_ctx_free(vdev, ctx, 0);
>  	}
> -	vfio_intx_set_signal(vdev, -1);
>  	vdev->irq_type = VFIO_PCI_NUM_IRQS;
> -	vfio_irq_ctx_free(vdev, ctx, 0);
>  }
>  
>  /*
> @@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
>  		return -EINVAL;
>  
>  	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
> +		struct eventfd_ctx *trigger = NULL;
>  		int32_t fd = *(int32_t *)data;
>  		int ret;
>  
> -		if (is_intx(vdev))
> -			return vfio_intx_set_signal(vdev, fd);
> +		if (fd >= 0) {
> +			trigger = eventfd_ctx_fdget(fd);
> +			if (IS_ERR(trigger))
> +				return PTR_ERR(trigger);
> +		}
>  
> -		ret = vfio_intx_enable(vdev);
> -		if (ret)
> -			return ret;
> +		if (is_intx(vdev))
> +			ret = vfio_intx_set_signal(vdev, trigger);
> +		else
> +			ret = vfio_intx_enable(vdev, trigger);
>  
> -		ret = vfio_intx_set_signal(vdev, fd);
> -		if (ret)
> -			vfio_intx_disable(vdev);
> +		if (ret && trigger)
> +			eventfd_ctx_put(trigger);
>  
>  		return ret;
>  	}


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ