[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <560DC45A.3050507@gmail.com>
Date: Thu, 1 Oct 2015 16:40:10 -0700
From: Alexander Duyck <alexander.duyck@...il.com>
To: Stephen Hemminger <stephen@...workplumber.org>, hjk@...sjkoch.de,
gregkh@...ux-foundation.org
Cc: dev@...k.org, linux-kernel@...r.kernel.org
Subject: Re: [dpdk-dev] [PATCH 2/2] uio: new driver to support PCI MSI-X
On 09/30/2015 03:28 PM, Stephen Hemminger wrote:
> This driver allows using PCI device with Message Signalled Interrupt
> from userspace. The API is similar to the igb_uio driver used by the DPDK.
> Via ioctl it provides a mechanism to map MSI-X interrupts into event
> file descriptors similar to VFIO.
>
> VFIO is a better choice if IOMMU is available, but often userspace drivers
> have to work in environments where IOMMU support (real or emulated) is
> not available. All UIO drivers that support DMA are not secure against
> rogue userspace applications programming DMA hardware to access
> private memory; this driver is no less secure than existing code.
>
> Signed-off-by: Stephen Hemminger <stephen@...workplumber.org>
> ---
> drivers/uio/Kconfig | 9 ++
> drivers/uio/Makefile | 1 +
> drivers/uio/uio_msi.c | 378 +++++++++++++++++++++++++++++++++++++++++++
> include/uapi/linux/Kbuild | 1 +
> include/uapi/linux/uio_msi.h | 22 +++
> 5 files changed, 411 insertions(+)
> create mode 100644 drivers/uio/uio_msi.c
> create mode 100644 include/uapi/linux/uio_msi.h
>
> diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
> index 52c98ce..04adfa0 100644
> --- a/drivers/uio/Kconfig
> +++ b/drivers/uio/Kconfig
> @@ -93,6 +93,15 @@ config UIO_PCI_GENERIC
> primarily, for virtualization scenarios.
> If you compile this as a module, it will be called uio_pci_generic.
>
> +config UIO_PCI_MSI
> + tristate "Generic driver supporting MSI-x on PCI Express cards"
> + depends on PCI
> + help
> + Generic driver that provides Message Signalled IRQ events
> + similar to VFIO. If IOMMMU is available please use VFIO
> + instead since it provides more security.
> + If you compile this as a module, it will be called uio_msi.
> +
> config UIO_NETX
> tristate "Hilscher NetX Card driver"
> depends on PCI
Should you maybe instead depend on CONFIG_PCI_MSI. Without MSI this is
essentially just uio_pci_generic with a bit more greedy mapping setup.
> diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
> index 8560dad..62fc44b 100644
> --- a/drivers/uio/Makefile
> +++ b/drivers/uio/Makefile
> @@ -9,3 +9,4 @@ obj-$(CONFIG_UIO_NETX) += uio_netx.o
> obj-$(CONFIG_UIO_PRUSS) += uio_pruss.o
> obj-$(CONFIG_UIO_MF624) += uio_mf624.o
> obj-$(CONFIG_UIO_FSL_ELBC_GPCM) += uio_fsl_elbc_gpcm.o
> +obj-$(CONFIG_UIO_PCI_MSI) += uio_msi.o
> diff --git a/drivers/uio/uio_msi.c b/drivers/uio/uio_msi.c
> new file mode 100644
> index 0000000..802b5c4
> --- /dev/null
> +++ b/drivers/uio/uio_msi.c
> @@ -0,0 +1,378 @@
> +/*-
> + *
> + * Copyright (c) 2015 by Brocade Communications Systems, Inc.
> + * Author: Stephen Hemminger <stephen@...workplumber.org>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 only.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/eventfd.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/uio_driver.h>
> +#include <linux/msi.h>
> +#include <linux/uio_msi.h>
> +
> +#define DRIVER_VERSION "0.1.1"
> +#define MAX_MSIX_VECTORS 64
> +
> +/* MSI-X vector information */
> +struct uio_msi_pci_dev {
> + struct uio_info info; /* UIO driver info */
> + struct pci_dev *pdev; /* PCI device */
> + struct mutex mutex; /* open/release/ioctl mutex */
> + int ref_cnt; /* references to device */
> + unsigned int max_vectors; /* MSI-X slots available */
> + struct msix_entry *msix; /* MSI-X vector table */
> + struct uio_msi_irq_ctx {
> + struct eventfd_ctx *trigger; /* vector to eventfd */
> + char *name; /* name in /proc/interrupts */
> + } *ctx;
> +};
> +
I would move the definition of uio_msi_irq_ctx out of uio_msi_pci_dev.
It would help to make this a bit more readable.
> +static irqreturn_t uio_intx_irqhandler(int irq, void *arg)
> +{
> + struct uio_msi_pci_dev *udev = arg;
> +
> + if (pci_check_and_mask_intx(udev->pdev)) {
> + eventfd_signal(udev->ctx->trigger, 1);
> + return IRQ_HANDLED;
> + }
> +
> + return IRQ_NONE;
> +}
> +
I would really prefer to see the intx handling dropped since there are
already 2 different UIO drivers setup for handling INTx style
interrupts. Lets focus on the parts from the last decade and drop
support for INTx now in favor of MSI-X and maybe MSI. If we _REALLY_
need it we can always come back later and add it.
> +static irqreturn_t uio_msi_irqhandler(int irq, void *arg)
> +{
> + struct eventfd_ctx *trigger = arg;
> +
> + eventfd_signal(trigger, 1);
> + return IRQ_HANDLED;
> +}
> +
> +/* set the mapping between vector # and existing eventfd. */
> +static int set_irq_eventfd(struct uio_msi_pci_dev *udev, u32 vec, int fd)
> +{
> + struct eventfd_ctx *trigger;
> + int irq, err;
> +
> + if (vec >= udev->max_vectors) {
> + dev_notice(&udev->pdev->dev, "vec %u >= num_vec %u\n",
> + vec, udev->max_vectors);
> + return -ERANGE;
> + }
> +
> + irq = udev->msix[vec].vector;
> + trigger = udev->ctx[vec].trigger;
> + if (trigger) {
> + /* Clearup existing irq mapping */
Minor spelling issue here, "Clear up" should be 2 words, "Cleanup" can
be one.
> + free_irq(irq, trigger);
> + eventfd_ctx_put(trigger);
> + udev->ctx[vec].trigger = NULL;
> + }
> +
> + /* Passing -1 is used to disable interrupt */
> + if (fd < 0)
> + return 0;
> +
> + trigger = eventfd_ctx_fdget(fd);
> + if (IS_ERR(trigger)) {
> + err = PTR_ERR(trigger);
> + dev_notice(&udev->pdev->dev,
> + "eventfd ctx get failed: %d\n", err);
> + return err;
> + }
> +
> + if (udev->msix)
> + err = request_irq(irq, uio_msi_irqhandler, 0,
> + udev->ctx[vec].name, trigger);
> + else
> + err = request_irq(irq, uio_intx_irqhandler, IRQF_SHARED,
> + udev->ctx[vec].name, udev);
> +
> + if (err) {
> + dev_notice(&udev->pdev->dev,
> + "request irq failed: %d\n", err);
> + eventfd_ctx_put(trigger);
> + return err;
> + }
> +
> + udev->ctx[vec].trigger = trigger;
> + return 0;
> +}
> +
> +static int
> +uio_msi_ioctl(struct uio_info *info, unsigned int cmd, unsigned long arg)
> +{
> + struct uio_msi_pci_dev *udev
> + = container_of(info, struct uio_msi_pci_dev, info);
> + struct uio_msi_irq_set hdr;
> + int err;
> +
> + switch (cmd) {
> + case UIO_MSI_IRQ_SET:
> + if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr)))
> + return -EFAULT;
> +
> + mutex_lock(&udev->mutex);
> + err = set_irq_eventfd(udev, hdr.vec, hdr.fd);
> + mutex_unlock(&udev->mutex);
> + break;
> + default:
> + err = -EOPNOTSUPP;
> + }
> + return err;
> +}
> +
> +/* Opening the UIO device for first time enables MSI-X */
> +static int
> +uio_msi_open(struct uio_info *info, struct inode *inode)
> +{
> + struct uio_msi_pci_dev *udev
> + = container_of(info, struct uio_msi_pci_dev, info);
> + int err = 0;
> +
> + mutex_lock(&udev->mutex);
> + if (udev->ref_cnt++ == 0) {
> + if (udev->msix)
> + err = pci_enable_msix(udev->pdev, udev->msix,
> + udev->max_vectors);
> + }
> + mutex_unlock(&udev->mutex);
> +
> + return err;
> +}
> +
I agree with some other reviewers. Why call pci_enable_msix in open?
It seems like it would make much more sense to do this on probe, and
then disable MSI-X on free. I can only assume you are trying to do it
to save on resources but the fact is this is a driver you have to
explicitly force onto a device so you would probably be safe to assume
that they plan to use it in the near future.
> +/* Last close of the UIO device releases/disables all IRQ's */
> +static int
> +uio_msi_release(struct uio_info *info, struct inode *inode)
> +{
> + struct uio_msi_pci_dev *udev
> + = container_of(info, struct uio_msi_pci_dev, info);
> + int i;
> +
> + mutex_lock(&udev->mutex);
> + if (--udev->ref_cnt == 0) {
> + for (i = 0; i < udev->max_vectors; i++) {
> + int irq = udev->msix[i].vector;
> + struct eventfd_ctx *trigger = udev->ctx[i].trigger;
> +
> + if (!trigger)
> + continue;
> +
> + free_irq(irq, trigger);
> + eventfd_ctx_put(trigger);
> + udev->ctx[i].trigger = NULL;
> + }
> +
> + if (udev->msix)
> + pci_disable_msix(udev->pdev);
> + }
> + mutex_unlock(&udev->mutex);
> +
> + return 0;
> +}
> +
> +/* Unmap previously ioremap'd resources */
> +static void
> +release_iomaps(struct uio_mem *mem)
> +{
> + int i;
> +
> + for (i = 0; i < MAX_UIO_MAPS; i++, mem++) {
> + if (mem->internal_addr)
> + iounmap(mem->internal_addr);
> + }
> +}
> +
> +static int
> +setup_maps(struct pci_dev *pdev, struct uio_info *info)
> +{
> + int i, m = 0, p = 0, err;
> + static const char * const bar_names[] = {
> + "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5",
> + };
> +
> + for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
> + unsigned long start = pci_resource_start(pdev, i);
> + unsigned long flags = pci_resource_flags(pdev, i);
> + unsigned long len = pci_resource_len(pdev, i);
> +
> + if (start == 0 || len == 0)
> + continue;
> +
> + if (flags & IORESOURCE_MEM) {
> + void __iomem *addr;
> +
> + if (m >= MAX_UIO_MAPS)
> + continue;
> +
> + addr = ioremap(start, len);
> + if (addr == NULL) {
> + err = -EINVAL;
> + goto fail;
> + }
> +
> + info->mem[m].name = bar_names[i];
> + info->mem[m].addr = start;
> + info->mem[m].internal_addr = addr;
> + info->mem[m].size = len;
> + info->mem[m].memtype = UIO_MEM_PHYS;
> + ++m;
> + } else if (flags & IORESOURCE_IO) {
> + if (p >= MAX_UIO_PORT_REGIONS)
> + continue;
> +
> + info->port[p].name = bar_names[i];
> + info->port[p].start = start;
> + info->port[p].size = len;
> + info->port[p].porttype = UIO_PORT_X86;
> + ++p;
> + }
> + }
> +
> + return 0;
> + fail:
> + for (i = 0; i < m; i++)
> + iounmap(info->mem[i].internal_addr);
> + return err;
> +}
> +
Do you really need to map IORESOURCE bars? Most drivers I can think of
don't use IO BARs anymore. Maybe we could look at just dropping the
code and adding it back later if we have a use case that absolutely
needs it.
Also how many devices actually need resources beyond BAR 0? I'm just
curious as I know BAR 2 on many of the Intel devices is the register
space related to MSI-X so now we have both the PCIe subsystem and user
space with access to this region.
> +static int uio_msi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> +{
> + struct uio_msi_pci_dev *udev;
> + int i, err, vectors;
> +
> + udev = kzalloc(sizeof(struct uio_msi_pci_dev), GFP_KERNEL);
> + if (!udev)
> + return -ENOMEM;
> +
> + err = pci_enable_device(pdev);
> + if (err != 0) {
> + dev_err(&pdev->dev, "cannot enable PCI device\n");
> + goto fail_free;
> + }
> +
> + err = pci_request_regions(pdev, "uio_msi");
> + if (err != 0) {
> + dev_err(&pdev->dev, "Cannot request regions\n");
> + goto fail_disable;
> + }
> +
> + pci_set_master(pdev);
> +
> + /* remap resources */
> + err = setup_maps(pdev, &udev->info);
> + if (err)
> + goto fail_release_iomem;
> +
> + /* fill uio infos */
> + udev->info.name = "uio_msi";
> + udev->info.version = DRIVER_VERSION;
> + udev->info.priv = udev;
> + udev->pdev = pdev;
> + udev->info.ioctl = uio_msi_ioctl;
> + udev->info.open = uio_msi_open;
> + udev->info.release = uio_msi_release;
> + udev->info.irq = UIO_IRQ_CUSTOM;
> + mutex_init(&udev->mutex);
> +
> + vectors = pci_msix_vec_count(pdev);
> + if (vectors > 0) {
> + udev->max_vectors = min_t(u16, vectors, MAX_MSIX_VECTORS);
> + dev_info(&pdev->dev, "using up to %u MSI-X vectors\n",
> + udev->max_vectors);
> +
> + err = -ENOMEM;
> + udev->msix = kcalloc(udev->max_vectors,
> + sizeof(struct msix_entry), GFP_KERNEL);
> + if (!udev->msix)
> + goto fail_release_iomem;
> + } else if (!pci_intx_mask_supported(pdev)) {
> + dev_err(&pdev->dev,
> + "device does not support MSI-X or INTX\n");
> + err = -EINVAL;
> + goto fail_release_iomem;
> + } else {
> + dev_notice(&pdev->dev, "using INTX\n");
> + udev->info.irq_flags = IRQF_SHARED;
> + udev->max_vectors = 1;
> + }
> +
> + udev->ctx = kcalloc(udev->max_vectors,
> + sizeof(struct uio_msi_irq_ctx), GFP_KERNEL);
> + if (!udev->ctx)
> + goto fail_free_msix;
> +
> + for (i = 0; i < udev->max_vectors; i++) {
> + udev->msix[i].entry = i;
> +
> + udev->ctx[i].name = kasprintf(GFP_KERNEL,
> + KBUILD_MODNAME "[%d](%s)",
> + i, pci_name(pdev));
> + if (!udev->ctx[i].name)
> + goto fail_free_ctx;
> + }
> +
> + /* register uio driver */
> + err = uio_register_device(&pdev->dev, &udev->info);
> + if (err != 0)
> + goto fail_free_ctx;
> +
> + pci_set_drvdata(pdev, udev);
> + return 0;
> +
> +fail_free_ctx:
> + for (i = 0; i < udev->max_vectors; i++)
> + kfree(udev->ctx[i].name);
> + kfree(udev->ctx);
> +fail_free_msix:
> + kfree(udev->msix);
> +fail_release_iomem:
> + release_iomaps(udev->info.mem);
> + pci_release_regions(pdev);
> +fail_disable:
> + pci_disable_device(pdev);
> +fail_free:
> + kfree(udev);
> +
> + pr_notice("%s ret %d\n", __func__, err);
> + return err;
> +}
> +
> +static void uio_msi_remove(struct pci_dev *pdev)
> +{
> + struct uio_info *info = pci_get_drvdata(pdev);
> + struct uio_msi_pci_dev *udev
> + = container_of(info, struct uio_msi_pci_dev, info);
> + int i;
> +
> + uio_unregister_device(info);
> + release_iomaps(info->mem);
> +
> + pci_release_regions(pdev);
> + for (i = 0; i < udev->max_vectors; i++)
> + kfree(udev->ctx[i].name);
> + kfree(udev->ctx);
> + kfree(udev->msix);
> + pci_disable_device(pdev);
> +
> + pci_set_drvdata(pdev, NULL);
> + kfree(udev);
> +}
> +
> +static struct pci_driver uio_msi_pci_driver = {
> + .name = "uio_msi",
> + .probe = uio_msi_probe,
> + .remove = uio_msi_remove,
> +};
> +
> +module_pci_driver(uio_msi_pci_driver);
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Stephen Hemminger <stephen@...workplumber.org>");
> +MODULE_DESCRIPTION("UIO driver for MSI PCI devices");
> diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
> index f7b2db4..d9497691 100644
> --- a/include/uapi/linux/Kbuild
> +++ b/include/uapi/linux/Kbuild
> @@ -411,6 +411,7 @@ header-y += udp.h
> header-y += uhid.h
> header-y += uinput.h
> header-y += uio.h
> +header-y += uio_msi.h
> header-y += ultrasound.h
> header-y += un.h
> header-y += unistd.h
> diff --git a/include/uapi/linux/uio_msi.h b/include/uapi/linux/uio_msi.h
> new file mode 100644
> index 0000000..297de00
> --- /dev/null
> +++ b/include/uapi/linux/uio_msi.h
> @@ -0,0 +1,22 @@
> +/*
> + * UIO_MSI API definition
> + *
> + * Copyright (c) 2015 by Brocade Communications Systems, Inc.
> + * All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef _UIO_PCI_MSI_H
> +#define _UIO_PCI_MSI_H
> +
> +struct uio_msi_irq_set {
> + u32 vec;
> + int fd;
> +};
> +
> +#define UIO_MSI_BASE 0x86
> +#define UIO_MSI_IRQ_SET _IOW('I', UIO_MSI_BASE+1, struct uio_msi_irq_set)
> +
> +#endif
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists