[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <82768612-0d23-55a9-dedf-58ade57b37af@amazon.de>
Date: Thu, 2 Jul 2020 17:24:11 +0200
From: Alexander Graf <graf@...zon.de>
To: Andra Paraschiv <andraprs@...zon.com>,
<linux-kernel@...r.kernel.org>
CC: Anthony Liguori <aliguori@...zon.com>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Colm MacCarthaigh <colmmacc@...zon.com>,
"Bjoern Doebel" <doebel@...zon.de>,
David Woodhouse <dwmw@...zon.co.uk>,
"Frank van der Linden" <fllinden@...zon.com>,
Greg KH <gregkh@...uxfoundation.org>,
Martin Pohlack <mpohlack@...zon.de>,
Matt Wilson <msw@...zon.com>,
"Paolo Bonzini" <pbonzini@...hat.com>,
Balbir Singh <sblbir@...zon.com>,
"Stefano Garzarella" <sgarzare@...hat.com>,
Stefan Hajnoczi <stefanha@...hat.com>,
Stewart Smith <trawets@...zon.com>,
Uwe Dannowski <uwed@...zon.de>, <kvm@...r.kernel.org>,
<ne-devel-upstream@...zon.com>
Subject: Re: [PATCH v4 06/18] nitro_enclaves: Handle out-of-band PCI device
events
On 22.06.20 22:03, Andra Paraschiv wrote:
> In addition to the replies sent by the Nitro Enclaves PCI device in
> response to command requests, out-of-band enclave events can happen e.g.
> an enclave crashes. In this case, the Nitro Enclaves driver needs to be
> aware of the event and notify the corresponding user space process that
> abstracts the enclave.
>
> Register an MSI-X interrupt vector to be used for this kind of
> out-of-band events. The interrupt notifies that the state of an enclave
> changed and the driver logic scans the state of each running enclave to
> identify for which this notification is intended.
>
> Create an workqueue to handle the out-of-band events. Notify user space
> enclave process that is using a polling mechanism on the enclave fd.
>
> Signed-off-by: Alexandru-Catalin Vasile <lexnv@...zon.com>
> Signed-off-by: Andra Paraschiv <andraprs@...zon.com>
> ---
> Changelog
>
> v3 -> v4
>
> * Use dev_err instead of custom NE log pattern.
> * Return IRQ_NONE when interrupts are not handled.
>
> v2 -> v3
>
> * Remove the WARN_ON calls.
> * Update static calls sanity checks.
> * Remove "ratelimited" from the logs that are not in the ioctl call
> paths.
>
> v1 -> v2
>
> * Add log pattern for NE.
> * Update goto labels to match their purpose.
> ---
> drivers/virt/nitro_enclaves/ne_pci_dev.c | 122 +++++++++++++++++++++++
> 1 file changed, 122 insertions(+)
>
> diff --git a/drivers/virt/nitro_enclaves/ne_pci_dev.c b/drivers/virt/nitro_enclaves/ne_pci_dev.c
> index c24230cfe7c0..9a137862cade 100644
> --- a/drivers/virt/nitro_enclaves/ne_pci_dev.c
> +++ b/drivers/virt/nitro_enclaves/ne_pci_dev.c
> @@ -239,6 +239,93 @@ static irqreturn_t ne_reply_handler(int irq, void *args)
> return IRQ_HANDLED;
> }
>
> +/**
> + * ne_event_work_handler - Work queue handler for notifying enclaves on
> + * a state change received by the event interrupt handler.
> + *
> + * An out-of-band event is being issued by the Nitro Hypervisor when at least
> + * one enclave is changing state without client interaction.
> + *
> + * @work: item containing the Nitro Enclaves PCI device for which a
> + * out-of-band event was issued.
> + */
> +static void ne_event_work_handler(struct work_struct *work)
> +{
> + struct ne_pci_dev_cmd_reply cmd_reply = {};
> + struct ne_enclave *ne_enclave = NULL;
> + struct ne_pci_dev *ne_pci_dev =
> + container_of(work, struct ne_pci_dev, notify_work);
> + int rc = -EINVAL;
> + struct slot_info_req slot_info_req = {};
> +
> + if (!ne_pci_dev)
> + return;
How?
> +
> + mutex_lock(&ne_pci_dev->enclaves_list_mutex);
> +
> + /*
> + * Iterate over all enclaves registered for the Nitro Enclaves
> + * PCI device and determine for which enclave(s) the out-of-band event
> + * is corresponding to.
> + */
> + list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list,
> + enclave_list_entry) {
> + mutex_lock(&ne_enclave->enclave_info_mutex);
> +
> + /*
> + * Enclaves that were never started cannot receive out-of-band
> + * events.
> + */
> + if (ne_enclave->state != NE_STATE_RUNNING)
> + goto unlock;
> +
> + slot_info_req.slot_uid = ne_enclave->slot_uid;
> +
> + rc = ne_do_request(ne_enclave->pdev, SLOT_INFO, &slot_info_req,
> + sizeof(slot_info_req), &cmd_reply,
> + sizeof(cmd_reply));
> + if (rc < 0)
> + dev_err(&ne_enclave->pdev->dev,
> + "Error in slot info [rc=%d]\n", rc);
> +
> + /* Notify enclave process that the enclave state changed. */
> + if (ne_enclave->state != cmd_reply.state) {
> + ne_enclave->state = cmd_reply.state;
> +
> + ne_enclave->has_event = true;
> +
> + wake_up_interruptible(&ne_enclave->eventq);
> + }
> +
> +unlock:
> + mutex_unlock(&ne_enclave->enclave_info_mutex);
> + }
> +
> + mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
> +}
> +
> +/**
> + * ne_event_handler - Interrupt handler for PCI device out-of-band
> + * events. This interrupt does not supply any data in the MMIO region.
> + * It notifies a change in the state of any of the launched enclaves.
> + *
> + * @irq: received interrupt for an out-of-band event.
> + * @args: PCI device private data structure.
> + *
> + * @returns: IRQ_HANDLED on handled interrupt, IRQ_NONE otherwise.
> + */
> +static irqreturn_t ne_event_handler(int irq, void *args)
> +{
> + struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
> +
> + if (!ne_pci_dev)
> + return IRQ_NONE;
How can this happen?
Alex
> +
> + queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
> +
> + return IRQ_HANDLED;
> +}
> +
> /**
> * ne_setup_msix - Setup MSI-X vectors for the PCI device.
> *
> @@ -284,8 +371,37 @@ static int ne_setup_msix(struct pci_dev *pdev)
> goto free_irq_vectors;
> }
>
> + ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
> + if (!ne_pci_dev->event_wq) {
> + rc = -ENOMEM;
> +
> + dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n",
> + rc);
> +
> + goto free_reply_irq_vec;
> + }
> +
> + INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
> +
> + /*
> + * This IRQ gets triggered every time any enclave's state changes. Its
> + * handler then scans for the changes and propagates them to the user
> + * space.
> + */
> + rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT),
> + ne_event_handler, 0, "enclave_evt", ne_pci_dev);
> + if (rc < 0) {
> + dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
> +
> + goto destroy_wq;
> + }
> +
> return 0;
>
> +destroy_wq:
> + destroy_workqueue(ne_pci_dev->event_wq);
> +free_reply_irq_vec:
> + free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
> free_irq_vectors:
> pci_free_irq_vectors(pdev);
>
> @@ -304,6 +420,12 @@ static void ne_teardown_msix(struct pci_dev *pdev)
> if (!ne_pci_dev)
> return;
>
> + free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
> +
> + flush_work(&ne_pci_dev->notify_work);
> + flush_workqueue(ne_pci_dev->event_wq);
> + destroy_workqueue(ne_pci_dev->event_wq);
> +
> free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
>
> pci_free_irq_vectors(pdev);
>
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Powered by blists - more mailing lists