[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e3f3e82e-8884-48da-b5ed-e0016fc27b04@linux.intel.com>
Date: Sun, 1 Sep 2024 13:16:43 +0800
From: Baolu Lu <baolu.lu@...ux.intel.com>
To: Klaus Jensen <its@...elevant.dk>, David Woodhouse <dwmw2@...radead.org>,
Joerg Roedel <joro@...tes.org>, Will Deacon <will@...nel.org>,
Robin Murphy <robin.murphy@....com>, Jason Gunthorpe <jgg@...pe.ca>,
Kevin Tian <kevin.tian@...el.com>
Cc: baolu.lu@...ux.intel.com, Minwoo Im <minwoo.im@...sung.com>,
linux-kernel@...r.kernel.org, iommu@...ts.linux.dev,
Joel Granados <j.granados@...sung.com>, Klaus Jensen <k.jensen@...sung.com>
Subject: Re: [PATCH RFC PREVIEW 1/6] iommu/vt-d: Separate page request queue
from SVM
On 2024/8/26 19:40, Klaus Jensen wrote:
> From: Joel Granados<j.granados@...sung.com>
>
> IO page faults are no longer dependent on CONFIG_INTEL_IOMMU_SVM. Move
> all Page Request Queue (PRQ) functions that handle prq events to a new
> file in drivers/iommu/intel/prq.c. The page_req_des struct is made
> available in drivers/iommu/intel/iommu.h.
>
> No functional changes are intended. This is a preparation patch to
> enable the use of IO page faults outside the SVM and nested use cases.
>
> Signed-off-by: Joel Granados<j.granados@...sung.com>
> ---
> drivers/iommu/intel/Makefile | 2 +-
> drivers/iommu/intel/iommu.c | 18 +--
> drivers/iommu/intel/iommu.h | 40 +++++-
> drivers/iommu/intel/prq.c | 290 ++++++++++++++++++++++++++++++++++++++++
> drivers/iommu/intel/svm.c | 308 -------------------------------------------
> 5 files changed, 331 insertions(+), 327 deletions(-)
>
> diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
> index c8beb0281559..d3bb0798092d 100644
> --- a/drivers/iommu/intel/Makefile
> +++ b/drivers/iommu/intel/Makefile
> @@ -1,6 +1,6 @@
> # SPDX-License-Identifier: GPL-2.0
> obj-$(CONFIG_DMAR_TABLE) += dmar.o
> -obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o
> +obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o
> obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
> obj-$(CONFIG_DMAR_PERF) += perf.o
> obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 9ff8b83c19a3..4ca284d53a6b 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -1487,12 +1487,10 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
> /* free context mapping */
> free_context_table(iommu);
>
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> if (pasid_supported(iommu)) {
> if (ecap_prs(iommu->ecap))
> - intel_svm_finish_prq(iommu);
> + intel_finish_prq(iommu);
> }
> -#endif
> }
>
> /*
> @@ -2480,19 +2478,18 @@ static int __init init_dmars(void)
>
> iommu_flush_write_buffer(iommu);
>
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
> /*
> * Call dmar_alloc_hwirq() with dmar_global_lock held,
> * could cause possible lock race condition.
> */
> up_write(&dmar_global_lock);
> - ret = intel_svm_enable_prq(iommu);
> + ret = intel_enable_prq(iommu);
> down_write(&dmar_global_lock);
> if (ret)
> goto free_iommu;
> }
> -#endif
> +
> ret = dmar_set_interrupt(iommu);
> if (ret)
> goto free_iommu;
> @@ -2922,13 +2919,12 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
> intel_iommu_init_qi(iommu);
> iommu_flush_write_buffer(iommu);
>
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
> - ret = intel_svm_enable_prq(iommu);
> + ret = intel_enable_prq(iommu);
> if (ret)
> goto disable_iommu;
> }
> -#endif
> +
> ret = dmar_set_interrupt(iommu);
> if (ret)
> goto disable_iommu;
> @@ -4669,9 +4665,7 @@ const struct iommu_ops intel_iommu_ops = {
> .def_domain_type = device_def_domain_type,
> .remove_dev_pasid = intel_iommu_remove_dev_pasid,
> .pgsize_bitmap = SZ_4K,
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> - .page_response = intel_svm_page_response,
> -#endif
> + .page_response = intel_page_response,
> .default_domain_ops = &(const struct iommu_domain_ops) {
> .attach_dev = intel_iommu_attach_device,
> .set_dev_pasid = intel_iommu_set_dev_pasid,
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index b67c14da1240..b3d98e706ed8 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -694,6 +694,35 @@ struct iommu_pmu {
> #define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED)
> #define IOMMU_IRQ_ID_OFFSET_PERF (2 * DMAR_UNITS_SUPPORTED)
>
> +/* Page request queue descriptor */
> +struct page_req_dsc {
> + union {
> + struct {
> + u64 type:8;
> + u64 pasid_present:1;
> + u64 rsvd:7;
> + u64 rid:16;
> + u64 pasid:20;
> + u64 exe_req:1;
> + u64 pm_req:1;
> + u64 rsvd2:10;
> + };
> + u64 qw_0;
> + };
> + union {
> + struct {
> + u64 rd_req:1;
> + u64 wr_req:1;
> + u64 lpig:1;
> + u64 prg_index:9;
> + u64 addr:52;
> + };
> + u64 qw_1;
> + };
> + u64 qw_2;
> + u64 qw_3;
> +};
Why not move this structure to prq.c? It is specific to that file. Or
not?
> +
> struct intel_iommu {
> void __iomem *reg; /* Pointer to hardware regs, virtual addr */
> u64 reg_phys; /* physical address of hw register set */
> @@ -719,12 +748,10 @@ struct intel_iommu {
>
> struct iommu_flush flush;
> #endif
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> struct page_req_dsc *prq;
> unsigned char prq_name[16]; /* Name for PRQ interrupt */
> unsigned long prq_seq_number;
> struct completion prq_complete;
> -#endif
> struct iopf_queue *iopf_queue;
> unsigned char iopfq_name[16];
> /* Synchronization between fault report and iommu device release. */
> @@ -1156,12 +1183,13 @@ void intel_context_flush_present(struct device_domain_info *info,
> struct context_entry *context,
> bool affect_domains);
>
> +int intel_enable_prq(struct intel_iommu *iommu);
> +int intel_finish_prq(struct intel_iommu *iommu);
> +void intel_page_response(struct device *dev, struct iopf_fault *evt,
> + struct iommu_page_response *msg);
> +
> #ifdef CONFIG_INTEL_IOMMU_SVM
> void intel_svm_check(struct intel_iommu *iommu);
> -int intel_svm_enable_prq(struct intel_iommu *iommu);
> -int intel_svm_finish_prq(struct intel_iommu *iommu);
> -void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
> - struct iommu_page_response *msg);
> struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
> struct mm_struct *mm);
> void intel_drain_pasid_prq(struct device *dev, u32 pasid);
> diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
> new file mode 100644
> index 000000000000..2814373e95d8
> --- /dev/null
> +++ b/drivers/iommu/intel/prq.c
> @@ -0,0 +1,290 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright © 2015 Intel Corporation.
> + *
> + * Authors: David Woodhouse<dwmw2@...radead.org>
Many contributors have worked on the code moved in this change. The
original authorship is no longer relevant.
Consider adding a comment like 'Split from svm.c' to document the
origin.
> + */
> +
> +#include <linux/pci.h>
> +
> +#include "iommu.h"
> +#include "../iommu-pages.h"
> +#include "trace.h"
> +
> +static bool is_canonical_address(u64 addr)
> +{
> + int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
> + long saddr = (long) addr;
> +
> + return (((saddr << shift) >> shift) == saddr);
> +}
> +
> +static void handle_bad_prq_event(struct intel_iommu *iommu,
> + struct page_req_dsc *req, int result)
> +{
> + struct qi_desc desc = { };
> +
> + pr_err("%s: Invalid page request: %08llx %08llx\n",
> + iommu->name, ((unsigned long long *)req)[0],
> + ((unsigned long long *)req)[1]);
> +
> + if (!req->lpig)
> + return;
> +
> + desc.qw0 = QI_PGRP_PASID(req->pasid) |
> + QI_PGRP_DID(req->rid) |
> + QI_PGRP_PASID_P(req->pasid_present) |
> + QI_PGRP_RESP_CODE(result) |
> + QI_PGRP_RESP_TYPE;
> + desc.qw1 = QI_PGRP_IDX(req->prg_index) |
> + QI_PGRP_LPIG(req->lpig);
> +
> + qi_submit_sync(iommu, &desc, 1, 0);
> +}
> +
> +static int prq_to_iommu_prot(struct page_req_dsc *req)
> +{
> + int prot = 0;
> +
> + if (req->rd_req)
> + prot |= IOMMU_FAULT_PERM_READ;
> + if (req->wr_req)
> + prot |= IOMMU_FAULT_PERM_WRITE;
> + if (req->exe_req)
> + prot |= IOMMU_FAULT_PERM_EXEC;
> + if (req->pm_req)
> + prot |= IOMMU_FAULT_PERM_PRIV;
> +
> + return prot;
> +}
> +
> +static void intel_prq_report(struct intel_iommu *iommu, struct device *dev,
> + struct page_req_dsc *desc)
> +{
> + struct iopf_fault event = { };
> +
> + /* Fill in event data for device specific processing */
> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
> + event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
> + event.fault.prm.pasid = desc->pasid;
> + event.fault.prm.grpid = desc->prg_index;
> + event.fault.prm.perm = prq_to_iommu_prot(desc);
> +
> + if (desc->lpig)
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + if (desc->pasid_present) {
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
> + }
> +
> + iommu_report_device_fault(dev, &event);
> +}
> +
> +static irqreturn_t prq_event_thread(int irq, void *d)
> +{
> + struct intel_iommu *iommu = d;
> + struct page_req_dsc *req;
> + int head, tail, handled;
> + struct device *dev;
> + u64 address;
> +
> + /*
> + * Clear PPR bit before reading head/tail registers, to ensure that
> + * we get a new interrupt if needed.
> + */
> + writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
> +
> + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
> + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
> + handled = (head != tail);
> + while (head != tail) {
> + req = &iommu->prq[head / sizeof(*req)];
> + address = (u64)req->addr << VTD_PAGE_SHIFT;
> +
> + if (unlikely(!req->pasid_present)) {
> + pr_err("IOMMU: %s: Page request without PASID\n",
> + iommu->name);
> +bad_req:
> + handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
> + goto prq_advance;
> + }
> +
> + if (unlikely(!is_canonical_address(address))) {
> + pr_err("IOMMU: %s: Address is not canonical\n",
> + iommu->name);
> + goto bad_req;
> + }
> +
> + if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
> + pr_err("IOMMU: %s: Page request in Privilege Mode\n",
> + iommu->name);
> + goto bad_req;
> + }
> +
> + if (unlikely(req->exe_req && req->rd_req)) {
> + pr_err("IOMMU: %s: Execution request not supported\n",
> + iommu->name);
> + goto bad_req;
> + }
> +
> + /* Drop Stop Marker message. No need for a response. */
> + if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
> + goto prq_advance;
> +
> + /*
> + * If prq is to be handled outside iommu driver via receiver of
> + * the fault notifiers, we skip the page response here.
> + */
> + mutex_lock(&iommu->iopf_lock);
> + dev = device_rbtree_find(iommu, req->rid);
> + if (!dev) {
> + mutex_unlock(&iommu->iopf_lock);
> + goto bad_req;
> + }
> +
> + intel_prq_report(iommu, dev, req);
> + trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
> + req->qw_2, req->qw_3,
> + iommu->prq_seq_number++);
> + mutex_unlock(&iommu->iopf_lock);
> +prq_advance:
> + head = (head + sizeof(*req)) & PRQ_RING_MASK;
> + }
> +
> + dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
> +
> + /*
> + * Clear the page request overflow bit and wake up all threads that
> + * are waiting for the completion of this handling.
> + */
> + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
> + pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
> + iommu->name);
> + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
> + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
> + if (head == tail) {
> + iopf_queue_discard_partial(iommu->iopf_queue);
> + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
> + pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
> + iommu->name);
> + }
> + }
> +
> + if (!completion_done(&iommu->prq_complete))
> + complete(&iommu->prq_complete);
> +
> + return IRQ_RETVAL(handled);
> +}
> +
> +int intel_enable_prq(struct intel_iommu *iommu)
> +{
> + struct iopf_queue *iopfq;
> + int irq, ret;
> +
> + iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
> + if (!iommu->prq) {
> + pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
> + iommu->name);
> + return -ENOMEM;
> + }
> +
> + irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
> + if (irq <= 0) {
> + pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
> + iommu->name);
> + ret = -EINVAL;
> + goto free_prq;
> + }
> + iommu->pr_irq = irq;
> +
> + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
> + "dmar%d-iopfq", iommu->seq_id);
> + iopfq = iopf_queue_alloc(iommu->iopfq_name);
> + if (!iopfq) {
> + pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
> + ret = -ENOMEM;
> + goto free_hwirq;
> + }
> + iommu->iopf_queue = iopfq;
> +
> + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
> +
> + ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
> + iommu->prq_name, iommu);
> + if (ret) {
> + pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
> + iommu->name);
> + goto free_iopfq;
> + }
> + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
> + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
> + dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
> +
> + init_completion(&iommu->prq_complete);
> +
> + return 0;
> +
> +free_iopfq:
> + iopf_queue_free(iommu->iopf_queue);
> + iommu->iopf_queue = NULL;
> +free_hwirq:
> + dmar_free_hwirq(irq);
> + iommu->pr_irq = 0;
> +free_prq:
> + iommu_free_pages(iommu->prq, PRQ_ORDER);
> + iommu->prq = NULL;
> +
> + return ret;
> +}
> +
> +int intel_finish_prq(struct intel_iommu *iommu)
> +{
> + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
> + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
> + dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
> +
> + if (iommu->pr_irq) {
> + free_irq(iommu->pr_irq, iommu);
> + dmar_free_hwirq(iommu->pr_irq);
> + iommu->pr_irq = 0;
> + }
> +
> + if (iommu->iopf_queue) {
> + iopf_queue_free(iommu->iopf_queue);
> + iommu->iopf_queue = NULL;
> + }
> +
> + iommu_free_pages(iommu->prq, PRQ_ORDER);
> + iommu->prq = NULL;
> +
> + return 0;
> +}
> +
> +void intel_page_response(struct device *dev, struct iopf_fault *evt,
> + struct iommu_page_response *msg)
> +{
> + struct device_domain_info *info = dev_iommu_priv_get(dev);
> + struct intel_iommu *iommu = info->iommu;
> + u8 bus = info->bus, devfn = info->devfn;
> + struct iommu_fault_page_request *prm;
> + struct qi_desc desc;
> + bool pasid_present;
> + bool last_page;
> + u16 sid;
> +
> + prm = &evt->fault.prm;
> + sid = PCI_DEVID(bus, devfn);
> + pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> + last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +
> + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
> + QI_PGRP_PASID_P(pasid_present) |
> + QI_PGRP_RESP_CODE(msg->code) |
> + QI_PGRP_RESP_TYPE;
> + desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
> + desc.qw2 = 0;
> + desc.qw3 = 0;
> +
> + qi_submit_sync(iommu, &desc, 1, 0);
> +}
The intel_drain_pasid_prq() helper should be moved to prq.c. It's no
longer specific to SVM.
Thanks,
baolu
Powered by blists - more mailing lists