lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1443977019-1513-3-git-send-email-vladz@cloudius-systems.com>
Date:	Sun,  4 Oct 2015 19:43:38 +0300
From:	Vlad Zolotarov <vladz@...udius-systems.com>
To:	linux-kernel@...r.kernel.org, mst@...hat.com, hjk@...sjkoch.de,
	regkh@...uxfoundation.org, corbet@....net
Cc:	bruce.richardson@...el.com, avi@...udius-systems.com,
	gleb@...udius-systems.com, stephen@...workplumber.org,
	alexander.duyck@...il.com,
	Vlad Zolotarov <vladz@...udius-systems.com>
Subject: [PATCH v1 2/3] uio_pci_generic: add MSI/MSI-X support

Add support for MSI and MSI-X interrupt modes:
   - Interrupt mode selection order is:
        INT#X (for backward compatibility) -> MSI-X -> MSI.
   - Add ioctl() commands:
      - UIO_PCI_GENERIC_INT_MODE_GET: query the current interrupt mode.
      - UIO_PCI_GENERIC_IRQ_NUM_GET: query the maximum number of IRQs.
      - UIO_PCI_GENERIC_IRQ_SET: bind the IRQ to eventfd (similar to vfio).
   - Add mappings to all bars (memory and portio): some devices have
     registers related to MSI/MSI-X handling outside BAR0.

Signed-off-by: Vlad Zolotarov <vladz@...udius-systems.com>
---
 drivers/uio/uio_pci_generic.c   | 409 +++++++++++++++++++++++++++++++++++++---
 include/linux/uio_pci_generic.h |  36 ++++
 2 files changed, 422 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/uio_pci_generic.h

diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index d0b508b..19365cc 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -22,16 +22,31 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/msi.h>
 #include <linux/slab.h>
 #include <linux/uio_driver.h>
+#include <linux/uio_pci_generic.h>
+#include <linux/eventfd.h>
 
 #define DRIVER_VERSION	"0.01.0"
 #define DRIVER_AUTHOR	"Michael S. Tsirkin <mst@...hat.com>"
 #define DRIVER_DESC	"Generic UIO driver for PCI 2.3 devices"
 
+struct msix_info {
+	int num_irqs;
+	struct msix_entry *table;
+	struct uio_msix_irq_ctx {
+		struct eventfd_ctx *trigger;	/* MSI-x vector to eventfd */
+		char *name;			/* name in /proc/interrupts */
+	} *ctx;
+};
+
 struct uio_pci_generic_dev {
 	struct uio_info info;
 	struct pci_dev *pdev;
+	struct mutex msix_state_lock;		/* ioctl mutex */
+	enum uio_int_mode int_mode;
+	struct msix_info msix;
 };
 
 static inline struct uio_pci_generic_dev *
@@ -40,9 +55,177 @@ to_uio_pci_generic_dev(struct uio_info *info)
 	return container_of(info, struct uio_pci_generic_dev, info);
 }
 
-/* Interrupt handler. Read/modify/write the command register to disable
- * the interrupt. */
-static irqreturn_t irqhandler(int irq, struct uio_info *info)
+/* Unmap previously ioremap'd resources */
+static void release_iomaps(struct uio_pci_generic_dev *gdev)
+{
+	int i;
+	struct uio_mem *mem = gdev->info.mem;
+
+	for (i = 0; i < MAX_UIO_MAPS; i++, mem++) {
+		if (mem->internal_addr) {
+			iounmap(mem->internal_addr);
+			mem->internal_addr = NULL;
+		}
+	}
+}
+
+static int setup_maps(struct pci_dev *pdev, struct uio_info *info)
+{
+	int i, m = 0, p = 0, err;
+	static const char * const bar_names[] = {
+		"BAR0",	"BAR1",	"BAR2",	"BAR3",	"BAR4",	"BAR5",
+	};
+
+	for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+		unsigned long start = pci_resource_start(pdev, i);
+		unsigned long flags = pci_resource_flags(pdev, i);
+		unsigned long len = pci_resource_len(pdev, i);
+
+		if (start == 0 || len == 0)
+			continue;
+
+		if (flags & IORESOURCE_MEM) {
+			void *addr;
+
+			if (m >= MAX_UIO_MAPS)
+				continue;
+
+			addr = ioremap(start, len);
+			if (addr == NULL) {
+				err = -EINVAL;
+				goto fail;
+			}
+
+			info->mem[m].name = bar_names[i];
+			info->mem[m].addr = start;
+			info->mem[m].internal_addr = addr;
+			info->mem[m].size = len;
+			info->mem[m].memtype = UIO_MEM_PHYS;
+			++m;
+		} else if (flags & IORESOURCE_IO) {
+			if (p >= MAX_UIO_PORT_REGIONS)
+				continue;
+
+			info->port[p].name = bar_names[i];
+			info->port[p].start = start;
+			info->port[p].size = len;
+			info->port[p].porttype = UIO_PORT_X86;
+			++p;
+		}
+	}
+
+	return 0;
+fail:
+	for (i = 0; i < m; i++) {
+		iounmap(info->mem[i].internal_addr);
+		info->mem[i].internal_addr = NULL;
+	}
+
+	return err;
+}
+
+static irqreturn_t msix_irqhandler(int irq, void *arg);
+
+/* set the mapping between vector # and existing eventfd. */
+static int set_irq_eventfd(struct uio_pci_generic_dev *gdev, int vec, int fd)
+{
+	struct uio_msix_irq_ctx *ctx;
+	struct eventfd_ctx *trigger;
+	struct pci_dev *pdev = gdev->pdev;
+	int irq, err;
+
+	if (vec >= gdev->msix.num_irqs) {
+		dev_notice(&gdev->pdev->dev, "vec %u >= num_vec %u\n",
+			   vec, gdev->msix.num_irqs);
+		return -ERANGE;
+	}
+
+	irq = gdev->msix.table[vec].vector;
+
+	/* Cleanup existing irq mapping */
+	ctx = &gdev->msix.ctx[vec];
+	if (ctx->trigger) {
+		free_irq(irq, ctx->trigger);
+		eventfd_ctx_put(ctx->trigger);
+		ctx->trigger = NULL;
+	}
+
+	/* Passing -1 is used to disable interrupt */
+	if (fd < 0)
+		return 0;
+
+
+	trigger = eventfd_ctx_fdget(fd);
+	if (IS_ERR(trigger)) {
+		err = PTR_ERR(trigger);
+		dev_notice(&gdev->pdev->dev,
+			   "eventfd ctx get failed: %d\n", err);
+		return err;
+	}
+
+	err = request_irq(irq, msix_irqhandler, 0, ctx->name, trigger);
+	if (err) {
+		dev_notice(&pdev->dev, "request irq failed: %d\n", err);
+		eventfd_ctx_put(trigger);
+		return err;
+	}
+
+	dev_dbg(&pdev->dev, "map vector %u to fd %d trigger %p\n",
+		vec, fd, trigger);
+	ctx->trigger = trigger;
+
+	return 0;
+}
+
+static int uio_pci_generic_ioctl(struct uio_info *info, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
+	struct uio_pci_generic_irq_set hdr;
+	int err;
+
+	switch (cmd) {
+	case UIO_PCI_GENERIC_IRQ_SET:
+		if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr)))
+			return -EFAULT;
+
+		/* Locking is needed to ensure two things:
+		 *  1) Two IRQ_SET ioctl()'s are not running in parallel.
+		 *  2) IRQ_SET ioctl() is not running in parallel with remove().
+		 */
+		mutex_lock(&gdev->msix_state_lock);
+		if (gdev->int_mode != UIO_INT_MODE_MSIX) {
+			mutex_unlock(&gdev->msix_state_lock);
+			return -EOPNOTSUPP;
+		}
+
+		err = set_irq_eventfd(gdev, hdr.vec, hdr.fd);
+		mutex_unlock(&gdev->msix_state_lock);
+
+		break;
+	case UIO_PCI_GENERIC_IRQ_NUM_GET:
+		if (gdev->int_mode == UIO_INT_MODE_NONE)
+			err = put_user(0, (u32 __user *)arg);
+		else if (gdev->int_mode != UIO_INT_MODE_MSIX)
+			err = put_user(1, (u32 __user *)arg);
+		else
+			err = put_user(gdev->msix.num_irqs,
+				       (u32 __user *)arg);
+
+		break;
+	case UIO_PCI_GENERIC_INT_MODE_GET:
+		err = put_user(gdev->int_mode, (u32 __user *)arg);
+
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+/* INT#X interrupt handler. */
+static irqreturn_t intx_irqhandler(int irq, struct uio_info *info)
 {
 	struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
 
@@ -53,8 +236,162 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info)
 	return IRQ_HANDLED;
 }
 
-static int probe(struct pci_dev *pdev,
-			   const struct pci_device_id *id)
+/* MSI interrupt handler. */
+static irqreturn_t msi_irqhandler(int irq, struct uio_info *info)
+{
+	/* UIO core will signal the user process. */
+	return IRQ_HANDLED;
+}
+
+/* MSI-X interrupt handler. */
+static irqreturn_t msix_irqhandler(int irq, void *arg)
+{
+	struct eventfd_ctx *trigger = arg;
+
+	pr_devel("irq %u trigger %p\n", irq, trigger);
+
+	eventfd_signal(trigger, 1);
+	return IRQ_HANDLED;
+}
+
+static bool enable_intx(struct uio_pci_generic_dev *gdev)
+{
+	struct pci_dev *pdev = gdev->pdev;
+
+	if (!pdev->irq || !pci_intx_mask_supported(pdev))
+		return false;
+
+	gdev->int_mode = UIO_INT_MODE_INTX;
+	gdev->info.irq = pdev->irq;
+	gdev->info.irq_flags = IRQF_SHARED;
+	gdev->info.handler = intx_irqhandler;
+
+	return true;
+}
+
+static void set_pci_master(struct pci_dev *pdev)
+{
+	pci_set_master(pdev);
+	dev_warn(&pdev->dev, "Enabling PCI bus mastering. Bogus userspace application is able to trash kernel memory using DMA");
+	add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+}
+
+static bool enable_msi(struct uio_pci_generic_dev *gdev)
+{
+	struct pci_dev *pdev = gdev->pdev;
+
+	set_pci_master(pdev);
+
+	if (pci_enable_msi(pdev))
+		return false;
+
+	gdev->int_mode = UIO_INT_MODE_MSI;
+	gdev->info.irq = pdev->irq;
+	gdev->info.irq_flags = 0;
+	gdev->info.handler = msi_irqhandler;
+
+	return true;
+}
+
+static bool enable_msix(struct uio_pci_generic_dev *gdev)
+{
+	struct pci_dev *pdev = gdev->pdev;
+	int i, vectors = pci_msix_vec_count(pdev);
+
+	if (vectors <= 0)
+		return false;
+
+	gdev->msix.table = kcalloc(vectors, sizeof(struct msix_entry),
+				   GFP_KERNEL);
+	if (!gdev->msix.table) {
+		dev_err(&pdev->dev, "Failed to allocate memory for MSI-X table");
+		return false;
+	}
+
+	gdev->msix.ctx = kcalloc(vectors, sizeof(struct uio_msix_irq_ctx),
+				 GFP_KERNEL);
+	if (!gdev->msix.ctx) {
+		dev_err(&pdev->dev, "Failed to allocate memory for MSI-X contexts");
+		goto err_ctx_alloc;
+	}
+
+	for (i = 0; i < vectors; i++) {
+		gdev->msix.table[i].entry = i;
+		gdev->msix.ctx[i].name = kasprintf(GFP_KERNEL,
+						   KBUILD_MODNAME "[%d](%s)",
+						   i, pci_name(pdev));
+		if (!gdev->msix.ctx[i].name)
+			goto err_name_alloc;
+	}
+
+	set_pci_master(pdev);
+
+	if (pci_enable_msix(pdev, gdev->msix.table, vectors))
+		goto err_msix_enable;
+
+	gdev->int_mode = UIO_INT_MODE_MSIX;
+	gdev->info.irq = UIO_IRQ_CUSTOM;
+	gdev->msix.num_irqs = vectors;
+
+	return true;
+
+err_msix_enable:
+	pci_clear_master(pdev);
+err_name_alloc:
+	for (i = 0; i < vectors; i++)
+		kfree(gdev->msix.ctx[i].name);
+
+	kfree(gdev->msix.ctx);
+err_ctx_alloc:
+	kfree(gdev->msix.table);
+
+	return false;
+}
+
+/**
+ * Disable interrupts and free related resources.
+ *
+ * @gdev device handle
+ *
+ * This function should be called after the corresponding UIO device has been
+ * unregistered. This will ensure that there are no currently running ioctl()s
+ * and there won't be any new ones until next probe() call.
+ */
+static void disable_intr(struct uio_pci_generic_dev *gdev)
+{
+	struct pci_dev *pdev = gdev->pdev;
+	int i;
+
+	switch (gdev->int_mode) {
+	case UIO_INT_MODE_MSI:
+		pci_disable_msi(pdev);
+		pci_clear_master(pdev);
+
+		break;
+	case UIO_INT_MODE_MSIX:
+		/* No need for locking here since there shouldn't be any
+		 * ioctl()s running by now.
+		 */
+		for (i = 0; i < gdev->msix.num_irqs; i++) {
+			if (gdev->msix.ctx[i].trigger)
+				set_irq_eventfd(gdev, i, -1);
+
+			kfree(gdev->msix.ctx[i].name);
+		}
+
+		pci_disable_msix(pdev);
+		pci_clear_master(pdev);
+		kfree(gdev->msix.ctx);
+		kfree(gdev->msix.table);
+
+		break;
+	default:
+		break;
+	}
+}
+
+
+static int probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct uio_pci_generic_dev *gdev;
 	int err;
@@ -66,42 +403,64 @@ static int probe(struct pci_dev *pdev,
 		return err;
 	}
 
-	if (!pdev->irq) {
-		dev_warn(&pdev->dev, "No IRQ assigned to device: "
-			 "no support for interrupts?\n");
-		pci_disable_device(pdev);
-		return -ENODEV;
-	}
-
-	if (!pci_intx_mask_supported(pdev)) {
-		err = -ENODEV;
-		goto err_verify;
-	}
-
 	gdev = kzalloc(sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
 	if (!gdev) {
 		err = -ENOMEM;
 		goto err_alloc;
 	}
 
+	gdev->pdev = pdev;
 	gdev->info.name = "uio_pci_generic";
 	gdev->info.version = DRIVER_VERSION;
-	gdev->info.irq = pdev->irq;
-	gdev->info.irq_flags = IRQF_SHARED;
-	gdev->info.handler = irqhandler;
-	gdev->pdev = pdev;
+	gdev->info.ioctl = uio_pci_generic_ioctl;
+	mutex_init(&gdev->msix_state_lock);
+
+	err = pci_request_regions(pdev, "uio_pci_generic");
+	if (err != 0) {
+		dev_err(&pdev->dev, "Cannot request regions\n");
+		goto err_request_regions;
+	}
+
+	/* Enable the corresponding interrupt mode. Try to enable INT#X first
+	 * for backward compatibility.
+	 */
+	if (enable_intx(gdev))
+		dev_info(&pdev->dev, "Using INT#x mode: IRQ %ld",
+			 gdev->info.irq);
+	else if (enable_msix(gdev))
+		dev_info(&pdev->dev, "Using MSI-X mode: number of IRQs %d",
+			 gdev->msix.num_irqs);
+	else if (enable_msi(gdev))
+		dev_info(&pdev->dev, "Using MSI mode: IRQ %ld", gdev->info.irq);
+	else {
+		err = -ENODEV;
+		goto err_verify;
+	}
+
+	/* remap resources */
+	err = setup_maps(pdev, &gdev->info);
+	if (err)
+		goto err_maps;
 
 	err = uio_register_device(&pdev->dev, &gdev->info);
 	if (err)
 		goto err_register;
+
 	pci_set_drvdata(pdev, gdev);
 
 	return 0;
+
 err_register:
+	release_iomaps(gdev);
+err_maps:
+	disable_intr(gdev);
+err_verify:
+	pci_release_regions(pdev);
+err_request_regions:
 	kfree(gdev);
 err_alloc:
-err_verify:
 	pci_disable_device(pdev);
+
 	return err;
 }
 
@@ -110,8 +469,12 @@ static void remove(struct pci_dev *pdev)
 	struct uio_pci_generic_dev *gdev = pci_get_drvdata(pdev);
 
 	uio_unregister_device(&gdev->info);
-	pci_disable_device(pdev);
+	disable_intr(gdev);
+	release_iomaps(gdev);
+	pci_release_regions(pdev);
 	kfree(gdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
 }
 
 static struct pci_driver uio_pci_driver = {
diff --git a/include/linux/uio_pci_generic.h b/include/linux/uio_pci_generic.h
new file mode 100644
index 0000000..10716fc
--- /dev/null
+++ b/include/linux/uio_pci_generic.h
@@ -0,0 +1,36 @@
+/*
+ * include/linux/uio_pci_generic.h
+ *
+ * Userspace generic PCI IO driver.
+ *
+ * Licensed under the GPLv2 only.
+ */
+
+#ifndef _UIO_PCI_GENERIC_H_
+#define _UIO_PCI_GENERIC_H_
+
+#include <linux/ioctl.h>
+
+enum uio_int_mode {
+	UIO_INT_MODE_NONE,
+	UIO_INT_MODE_INTX,
+	UIO_INT_MODE_MSI,
+	UIO_INT_MODE_MSIX
+};
+
+/* bind the requested IRQ to the given eventfd */
+struct uio_pci_generic_irq_set {
+	int vec; /* index of the IRQ to connect to starting from 0 */
+	int fd;
+};
+
+#define UIO_PCI_GENERIC_BASE		0x86
+
+#define UIO_PCI_GENERIC_IRQ_SET		_IOW('I', UIO_PCI_GENERIC_BASE + 1, \
+		struct uio_pci_generic_irq_set)
+#define UIO_PCI_GENERIC_IRQ_NUM_GET	_IOW('I', UIO_PCI_GENERIC_BASE + 2, \
+		uint32_t)
+#define UIO_PCI_GENERIC_INT_MODE_GET	_IOW('I', UIO_PCI_GENERIC_BASE + 3, \
+		uint32_t)
+
+#endif /* _UIO_PCI_GENERIC_H_ */
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ