lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <670c992437dc83e988cf383b2e4ec4d7f448e151.1567394624.git.luoben@linux.alibaba.com>
Date:   Mon,  2 Sep 2019 12:01:52 +0800
From:   Ben Luo <luoben@...ux.alibaba.com>
To:     tglx@...utronix.de, alex.williamson@...hat.com
Cc:     linux-kernel@...r.kernel.org, tao.ma@...ux.alibaba.com,
        gerry@...ux.alibaba.com, nanhai.zou@...ux.alibaba.com
Subject: [PATCH v6 3/3] vfio/pci: make use of irq_update_devid() and optimize irq ops

When userspace (e.g. qemu) triggers a switch between KVM
irqfd and userspace eventfd, only dev_id of irqaction
(i.e. the "trigger" in this patch's context) will be
changed, but a free-then-request-irq action is taken in
current code. And, interrupt affinity setting in VM will
also trigger a free-then-request-irq action, which actually
changes nothing in irqaction.

This patch makes use of irq_update_devid() and optimize
both cases above, which reduces the risk of losing interrupt
and also cuts some overhead.

Signed-off-by: Ben Luo <luoben@...ux.alibaba.com>
---
 drivers/vfio/pci/vfio_pci_intrs.c | 118 ++++++++++++++++++++++++++------------
 1 file changed, 81 insertions(+), 37 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3fa3f72..57b2de3 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -284,8 +284,8 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
 static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 				      int vector, int fd, bool msix)
 {
+	struct eventfd_ctx *trigger = NULL;
 	struct pci_dev *pdev = vdev->pdev;
-	struct eventfd_ctx *trigger;
 	int irq, ret;
 
 	if (vector < 0 || vector >= vdev->num_ctx)
@@ -293,61 +293,105 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 
 	irq = pci_irq_vector(pdev, vector);
 
+	if (fd >= 0)
+		trigger = eventfd_ctx_fdget(fd);
+
+	/*
+	 * 'trigger' is NULL or invalid, disable the interrupt
+	 * 'trigger' is same as before, only bounce the bypass registration
+	 * 'trigger' is a new valid one, update it to irqaction and other
+	 * data structures referencing to the old one; fallback to disable
+	 * the interrupt on error
+	 */
 	if (vdev->ctx[vector].trigger) {
-		free_irq(irq, vdev->ctx[vector].trigger);
+		/*
+		 * even if the trigger is unchanged, we need to bounce the
+		 * interrupt bypass connection to allow affinity changes in
+		 * the guest to be realized.
+		 */
 		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
-		kfree(vdev->ctx[vector].name);
-		eventfd_ctx_put(vdev->ctx[vector].trigger);
-		vdev->ctx[vector].trigger = NULL;
+
+		if (vdev->ctx[vector].trigger == trigger) {
+			/* avoid duplicated referencing to the same trigger */
+			eventfd_ctx_put(trigger);
+
+		} else if (trigger && !IS_ERR(trigger)) {
+			ret = irq_update_devid(irq,
+					       vdev->ctx[vector].trigger,
+					       trigger);
+			if (unlikely(ret)) {
+				dev_info(&pdev->dev,
+					 "update devid of %d (token %p) failed: %d\n",
+					 irq, vdev->ctx[vector].trigger, ret);
+				eventfd_ctx_put(trigger);
+				free_irq(irq, vdev->ctx[vector].trigger);
+				kfree(vdev->ctx[vector].name);
+				eventfd_ctx_put(vdev->ctx[vector].trigger);
+				vdev->ctx[vector].trigger = NULL;
+				return ret;
+			}
+			eventfd_ctx_put(vdev->ctx[vector].trigger);
+			vdev->ctx[vector].producer.token = trigger;
+			vdev->ctx[vector].trigger = trigger;
+
+		} else {
+			free_irq(irq, vdev->ctx[vector].trigger);
+			kfree(vdev->ctx[vector].name);
+			eventfd_ctx_put(vdev->ctx[vector].trigger);
+			vdev->ctx[vector].trigger = NULL;
+		}
 	}
 
 	if (fd < 0)
 		return 0;
+	else if (IS_ERR(trigger))
+		return PTR_ERR(trigger);
 
-	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
-					   msix ? "x" : "", vector,
-					   pci_name(pdev));
-	if (!vdev->ctx[vector].name)
-		return -ENOMEM;
+	if (!vdev->ctx[vector].trigger) {
+		vdev->ctx[vector].name = kasprintf(GFP_KERNEL,
+						   "vfio-msi%s[%d](%s)",
+						   msix ? "x" : "", vector,
+						   pci_name(pdev));
+		if (!vdev->ctx[vector].name) {
+			eventfd_ctx_put(trigger);
+			return -ENOMEM;
+		}
 
-	trigger = eventfd_ctx_fdget(fd);
-	if (IS_ERR(trigger)) {
-		kfree(vdev->ctx[vector].name);
-		return PTR_ERR(trigger);
-	}
+		/*
+		 * The MSIx vector table resides in device memory which may be
+		 * cleared via backdoor resets. We don't allow direct access to
+		 * the vector table so even if a userspace driver attempts to
+		 * save/restore around such a reset it would be unsuccessful.
+		 * To avoid this, restore the cached value of the message prior
+		 * to enabling.
+		 */
+		if (msix) {
+			struct msi_msg msg;
 
-	/*
-	 * The MSIx vector table resides in device memory which may be cleared
-	 * via backdoor resets. We don't allow direct access to the vector
-	 * table so even if a userspace driver attempts to save/restore around
-	 * such a reset it would be unsuccessful. To avoid this, restore the
-	 * cached value of the message prior to enabling.
-	 */
-	if (msix) {
-		struct msi_msg msg;
+			get_cached_msi_msg(irq, &msg);
+			pci_write_msi_msg(irq, &msg);
+		}
 
-		get_cached_msi_msg(irq, &msg);
-		pci_write_msi_msg(irq, &msg);
-	}
+		ret = request_irq(irq, vfio_msihandler, 0,
+				  vdev->ctx[vector].name, trigger);
+		if (ret) {
+			kfree(vdev->ctx[vector].name);
+			eventfd_ctx_put(trigger);
+			return ret;
+		}
 
-	ret = request_irq(irq, vfio_msihandler, 0,
-			  vdev->ctx[vector].name, trigger);
-	if (ret) {
-		kfree(vdev->ctx[vector].name);
-		eventfd_ctx_put(trigger);
-		return ret;
+		vdev->ctx[vector].producer.token = trigger;
+		vdev->ctx[vector].producer.irq = irq;
+		vdev->ctx[vector].trigger = trigger;
 	}
 
-	vdev->ctx[vector].producer.token = trigger;
-	vdev->ctx[vector].producer.irq = irq;
+	/* setup bypass connection and make irte updated */
 	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
 	if (unlikely(ret))
 		dev_info(&pdev->dev,
 		"irq bypass producer (token %p) registration fails: %d\n",
 		vdev->ctx[vector].producer.token, ret);
 
-	vdev->ctx[vector].trigger = trigger;
-
 	return 0;
 }
 
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ