[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <7b8967d8-8d03-5422-9222-f80778b59f6c@linux.alibaba.com>
Date: Sat, 31 Aug 2019 12:38:49 +0800
From: Ben Luo <luoben@...ux.alibaba.com>
To: Alex Williamson <alex.williamson@...hat.com>
Cc: tglx@...utronix.de, linux-kernel@...r.kernel.org,
tao.ma@...ux.alibaba.com, gerry@...ux.alibaba.com,
nanhai.zou@...ux.alibaba.com
Subject: Re: [PATCH v5 3/3] vfio/pci: make use of irq_update_devid and
optimize irq ops
在 2019/8/31 上午4:06, Alex Williamson 写道:
> On Fri, 30 Aug 2019 16:42:06 +0800
> Ben Luo <luoben@...ux.alibaba.com> wrote:
>
>> When userspace (e.g. qemu) triggers a switch between KVM
>> irqfd and userspace eventfd, only dev_id of irqaction
>> (i.e. the "trigger" in this patch's context) will be
>> changed, but a free-then-request-irq action is taken in
>> current code. And, irq affinity setting in VM will also
>> trigger a free-then-request-irq action, which actually
>> changes nothing, but only need to bounce the irqbypass
>> registraion in case that posted-interrupt is in use.
>>
>> This patch makes use of irq_update_devid() and optimize
>> both cases above, which reduces the risk of losing interrupt
>> and also cuts some overhead.
>>
>> Signed-off-by: Ben Luo <luoben@...ux.alibaba.com>
>> ---
>> drivers/vfio/pci/vfio_pci_intrs.c | 124 ++++++++++++++++++++++++++------------
>> 1 file changed, 87 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
>> index 3fa3f72..d3a93d7 100644
>> --- a/drivers/vfio/pci/vfio_pci_intrs.c
>> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
>> @@ -284,70 +284,120 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
>> static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
>> int vector, int fd, bool msix)
>> {
>> + struct eventfd_ctx *trigger = NULL;
>> struct pci_dev *pdev = vdev->pdev;
>> - struct eventfd_ctx *trigger;
>> int irq, ret;
>>
>> if (vector < 0 || vector >= vdev->num_ctx)
>> return -EINVAL;
>>
>> + if (fd >= 0) {
>> + trigger = eventfd_ctx_fdget(fd);
>> + if (IS_ERR(trigger)) {
>> + /* oops, going to disable this interrupt */
>> + dev_info(&pdev->dev,
>> + "get ctx error on bad fd: %d for vector:%d\n",
>> + fd, vector);
> I think a user could trigger this maliciously as a denial of service by
> simply providing a bogus file descriptor. The user is informed of the
> error by the return value, why do we need to spam the logs?
Ah, you are right, I will remove this log in next version
>> + }
>> + }
>> +
>> irq = pci_irq_vector(pdev, vector);
>>
>> + /*
>> + * 'trigger' is NULL or invalid, disable the interrupt
>> + * 'trigger' is same as before, only bounce the bypass registration
>> + * 'trigger' is a new invalid one, update it to irqaction and other
> s/invalid/valid/
sorry for typo :)
>> + * data structures referencing to the old one; fallback to disable
>> + * the interrupt on error
>> + */
>> if (vdev->ctx[vector].trigger) {
>> - free_irq(irq, vdev->ctx[vector].trigger);
>> + /*
>> + * even if the trigger is unchanged we need to bounce the
>> + * interrupt bypass connection to allow affinity changes in
>> + * the guest to be realized.
>> + */
>> irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
>> - kfree(vdev->ctx[vector].name);
>> - eventfd_ctx_put(vdev->ctx[vector].trigger);
>> - vdev->ctx[vector].trigger = NULL;
>> +
>> + if (vdev->ctx[vector].trigger == trigger) {
>> + /* avoid duplicated referencing to the same trigger */
>> + eventfd_ctx_put(trigger);
>> +
>> + } else if (trigger && !IS_ERR(trigger)) {
>> + ret = irq_update_devid(irq,
>> + vdev->ctx[vector].trigger, trigger);
>> + if (unlikely(ret)) {
>> + dev_info(&pdev->dev,
>> + "update devid of %d (token %p) failed: %d\n",
>> + irq, vdev->ctx[vector].trigger, ret);
>> + eventfd_ctx_put(trigger);
>> + free_irq(irq, vdev->ctx[vector].trigger);
>> + kfree(vdev->ctx[vector].name);
>> + eventfd_ctx_put(vdev->ctx[vector].trigger);
>> + vdev->ctx[vector].trigger = NULL;
>> + return ret;
>> + }
>> + eventfd_ctx_put(vdev->ctx[vector].trigger);
>> + vdev->ctx[vector].producer.token = trigger;
>> + vdev->ctx[vector].trigger = trigger;
>> +
>> + } else {
>> + free_irq(irq, vdev->ctx[vector].trigger);
>> + kfree(vdev->ctx[vector].name);
>> + eventfd_ctx_put(vdev->ctx[vector].trigger);
>> + vdev->ctx[vector].trigger = NULL;
>> + }
>> }
>>
>> if (fd < 0)
>> return 0;
>> + else if (IS_ERR(trigger))
>> + return PTR_ERR(trigger);
>>
>> - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
>> - msix ? "x" : "", vector,
>> - pci_name(pdev));
>> - if (!vdev->ctx[vector].name)
>> - return -ENOMEM;
>> + if (!vdev->ctx[vector].trigger) {
>> + vdev->ctx[vector].name = kasprintf(GFP_KERNEL,
>> + "vfio-msi%s[%d](%s)",
>> + msix ? "x" : "", vector,
>> + pci_name(pdev));
>> + if (!vdev->ctx[vector].name) {
>> + eventfd_ctx_put(trigger);
>> + return -ENOMEM;
>> + }
>>
>> - trigger = eventfd_ctx_fdget(fd);
>> - if (IS_ERR(trigger)) {
>> - kfree(vdev->ctx[vector].name);
>> - return PTR_ERR(trigger);
>> - }
>> + /*
>> + * The MSIx vector table resides in device memory which may be
>> + * cleared via backdoor resets. We don't allow direct access to
>> + * the vector table so even if a userspace driver attempts to
>> + * save/restore around such a reset it would be unsuccessful.
>> + * To avoid this, restore the cached value of the message prior
>> + * to enabling.
>> + */
>> + if (msix) {
>> + struct msi_msg msg;
>>
>> - /*
>> - * The MSIx vector table resides in device memory which may be cleared
>> - * via backdoor resets. We don't allow direct access to the vector
>> - * table so even if a userspace driver attempts to save/restore around
>> - * such a reset it would be unsuccessful. To avoid this, restore the
>> - * cached value of the message prior to enabling.
>> - */
>> - if (msix) {
>> - struct msi_msg msg;
>> + get_cached_msi_msg(irq, &msg);
>> + pci_write_msi_msg(irq, &msg);
>> + }
>>
>> - get_cached_msi_msg(irq, &msg);
>> - pci_write_msi_msg(irq, &msg);
>> - }
>> + ret = request_irq(irq, vfio_msihandler, 0,
>> + vdev->ctx[vector].name, trigger);
>> + if (ret) {
>> + kfree(vdev->ctx[vector].name);
>> + eventfd_ctx_put(trigger);
>> + return ret;
>> + }
>>
>> - ret = request_irq(irq, vfio_msihandler, 0,
>> - vdev->ctx[vector].name, trigger);
>> - if (ret) {
>> - kfree(vdev->ctx[vector].name);
>> - eventfd_ctx_put(trigger);
>> - return ret;
>> + vdev->ctx[vector].producer.token = trigger;
>> + vdev->ctx[vector].producer.irq = irq;
>> + vdev->ctx[vector].trigger = trigger;
>> }
>>
>> - vdev->ctx[vector].producer.token = trigger;
>> - vdev->ctx[vector].producer.irq = irq;
>> + /* setup bypass connection and make irte updated */
>> ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
>> if (unlikely(ret))
>> dev_info(&pdev->dev,
>> "irq bypass producer (token %p) registration fails: %d\n",
>> vdev->ctx[vector].producer.token, ret);
>>
>> - vdev->ctx[vector].trigger = trigger;
>> -
>> return 0;
>> }
>>
Powered by blists - more mailing lists