[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200428203644.3dcf13f2@jacob-builder>
Date:   Tue, 28 Apr 2020 20:36:44 -0700
From:   Jacob Pan <jacob.jun.pan@...ux.intel.com>
To:     Lu Baolu <baolu.lu@...ux.intel.com>
Cc:     Joerg Roedel <joro@...tes.org>, ashok.raj@...el.com,
        Liu Yi L <yi.l.liu@...el.com>, kevin.tian@...el.com,
        iommu@...ts.linux-foundation.org, linux-kernel@...r.kernel.org,
        jacob.jun.pan@...ux.intel.com
Subject: Re: [PATCH v3 3/4] iommu/vt-d: Add page request draining support
On Wed, 22 Apr 2020 16:06:10 +0800
Lu Baolu <baolu.lu@...ux.intel.com> wrote:
> When a PASID is stopped or terminated, there can be pending PRQs
> (requests that haven't received responses) in remapping hardware.
> This adds the interface to drain page requests and call it when a
> PASID is terminated.
> 
> Signed-off-by: Jacob Pan <jacob.jun.pan@...ux.intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@...el.com>
> Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
> ---
>  drivers/iommu/intel-svm.c   | 103
> ++++++++++++++++++++++++++++++++++-- include/linux/intel-iommu.h |
> 4 ++ 2 files changed, 102 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 83dc4319f661..2534641ef707 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -23,6 +23,7 @@
>  #include "intel-pasid.h"
>  
>  static irqreturn_t prq_event_thread(int irq, void *d);
> +static void intel_svm_drain_prq(struct device *dev, int pasid);
>  
>  #define PRQ_ORDER 0
>  
> @@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
>  	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
>  	dmar_writeq(iommu->reg + DMAR_PQA_REG,
> virt_to_phys(iommu->prq) | PRQ_ORDER); 
> +	init_completion(&iommu->prq_complete);
> +
>  	return 0;
>  }
>  
> @@ -208,6 +211,7 @@ static void intel_mm_release(struct mmu_notifier
> *mn, struct mm_struct *mm) rcu_read_lock();
>  	list_for_each_entry_rcu(sdev, &svm->devs, list) {
>  		intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
> svm->pasid);
> +		intel_svm_drain_prq(sdev->dev, svm->pasid);
mmu_notifier release is called in atomic context, drain_prq needs to
wait for completion. I tested exit path when a process crashes. I got
[  +0.696214] BUG: sleeping function called from invalid context at kernel/sched/completion.c:101
[  +0.000068] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 3235, name: dsatest
[  +0.000046] INFO: lockdep is turned off.
[  +0.000002] CPU: 1 PID: 3235 Comm: dsatest Not tainted 5.7.0-rc1-z-svmtest+ #1637
[  +0.000000] Hardware name: Intel Corporation Kabylake Client platform/Skylake Halo DDR4 RVP11, BIOS 
04.1709050855 09/05/2017
[  +0.000001] Call Trace:
[  +0.000004]  dump_stack+0x68/0x9b
[  +0.000003]  ___might_sleep+0x229/0x250
[  +0.000003]  wait_for_completion_timeout+0x3c/0x110
[  +0.000003]  intel_svm_drain_prq+0x12f/0x210
[  +0.000003]  intel_mm_release+0x73/0x110
[  +0.000003]  __mmu_notifier_release+0x94/0x220
[  +0.000002]  ? do_munmap+0x10/0x10
[  +0.000002]  ? prepare_ftrace_return+0x5c/0x80
[  +0.000003]  exit_mmap+0x156/0x1a0
[  +0.000002]  ? mmput+0x44/0x120
[  +0.000003]  ? exit_mmap+0x5/0x1a0
[  +0.000002]  ? ftrace_graph_caller+0xa0/0xa0
[  +0.000001]  mmput+0x5e/0x120
>  		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
>  	}
>  	rcu_read_unlock();
> @@ -401,12 +405,8 @@ int intel_svm_unbind_gpasid(struct device *dev,
> int pasid) if (!sdev->users) {
>  			list_del_rcu(&sdev->list);
>  			intel_pasid_tear_down_entry(iommu, dev,
> svm->pasid);
> +			intel_svm_drain_prq(dev, svm->pasid);
>  			intel_flush_svm_range_dev(svm, sdev, 0, -1,
> 0);
> -			/* TODO: Drain in flight PRQ for the PASID
> since it
> -			 * may get reused soon, we don't want to
> -			 * confuse with its previous life.
> -			 * intel_svm_drain_prq(dev, pasid);
> -			 */
>  			kfree_rcu(sdev, rcu);
>  
>  			if (list_empty(&svm->devs)) {
> @@ -644,6 +644,7 @@ int intel_svm_unbind_mm(struct device *dev, int
> pasid)
>  			 * large and has to be physically
> contiguous. So it's
>  			 * hard to be as defensive as we might like.
> */ intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
> +			intel_svm_drain_prq(dev, svm->pasid);
>  			intel_flush_svm_range_dev(svm, sdev, 0, -1,
> 0); kfree_rcu(sdev, rcu);
>  
> @@ -722,6 +723,92 @@ static bool is_canonical_address(u64 addr)
>  	return (((saddr << shift) >> shift) == saddr);
>  }
>  
> +/**
> + * intel_svm_drain_prq:
> + *
> + * Drain all pending page requests and responses related to a
> specific
> + * pasid in both software and hardware.
> + */
> +static void intel_svm_drain_prq(struct device *dev, int pasid)
> +{
> +	struct device_domain_info *info;
> +	struct dmar_domain *domain;
> +	struct intel_iommu *iommu;
> +	struct qi_desc desc[3];
> +	struct pci_dev *pdev;
> +	int head, tail;
> +	u16 sid, did;
> +	int qdep;
> +
> +	info = get_domain_info(dev);
> +	if (WARN_ON(!info || !dev_is_pci(dev)))
> +		return;
> +
> +	if (!info->ats_enabled)
> +		return;
> +
> +	iommu = info->iommu;
> +	domain = info->domain;
> +	pdev = to_pci_dev(dev);
> +	sid = PCI_DEVID(info->bus, info->devfn);
> +	did = domain->iommu_did[iommu->seq_id];
> +	qdep = pci_ats_queue_depth(pdev);
> +
> +	memset(desc, 0, sizeof(desc));
> +	desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
> +			QI_IWD_FENCE |
> +			QI_IWD_TYPE;
> +	desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
> +			QI_EIOTLB_DID(did) |
> +			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
> +			QI_EIOTLB_TYPE;
> +	desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
> +			QI_DEV_EIOTLB_SID(sid) |
> +			QI_DEV_EIOTLB_QDEP(qdep) |
> +			QI_DEIOTLB_TYPE |
> +			QI_DEV_IOTLB_PFSID(info->pfsid);
> +
> +	/*
> +	 * Submit an invalidation wait descriptor with fence and
> page request
> +	 * drain flags set to invalidation queue. This ensures that
> all requests
> +	 * submitted to the invalidation queue ahead of this wait
> descriptor are
> +	 * processed and completed, and all already issued page
> requests from
> +	 * the device are put in the page request queue.
> +	 */
> +	qi_submit_sync(iommu, desc, 1, QI_OPT_WAIT_DRAIN);
> +
> +	/*
> +	 * Check and wait until all pending page requests in the
> queue are
> +	 * handled by the intr thread.
> +	 */
> +prq_retry:
> +	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
> +	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
> +	while (head != tail) {
> +		struct page_req_dsc *req;
> +
> +		req = &iommu->prq[head / sizeof(*req)];
> +		if (!req->pasid_present || req->pasid != pasid) {
> +			head = (head + sizeof(*req)) & PRQ_RING_MASK;
> +			continue;
> +		}
> +
> +		wait_for_completion_timeout(&iommu->prq_complete,
> HZ);
> +		goto prq_retry;
> +	}
> +
> +	/*
> +	 * Perform steps described in VT-d spec CH7.10 to drain page
> +	 * requests and responses in hardware.
> +	 */
> +qi_retry:
> +	qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
> +	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
> +		wait_for_completion_timeout(&iommu->prq_complete,
> HZ);
> +		goto qi_retry;
> +	}
> +}
> +
>  static irqreturn_t prq_event_thread(int irq, void *d)
>  {
>  	struct intel_iommu *iommu = d;
> @@ -856,6 +943,12 @@ static irqreturn_t prq_event_thread(int irq,
> void *d) }
>  
>  	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
> +	/*
> +	 * Clear the page request overflow bit and wake up all
> threads that
> +	 * are waiting for the completion of this handling.
> +	 */
> +	writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
> +	complete(&iommu->prq_complete);
>  
>  	return IRQ_RETVAL(handled);
>  }
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index cca1e5f9aeaa..a0512b401a59 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -292,6 +292,8 @@
>  
>  /* PRS_REG */
>  #define DMA_PRS_PPR	((u32)1)
> +#define DMA_PRS_PRO	((u32)2)
> +
>  #define DMA_VCS_PAS	((u64)1)
>  
>  #define IOMMU_WAIT_OP(iommu, offset, op, cond,
> sts)			\ @@ -333,6 +335,7 @@ enum {
>  
>  #define QI_IWD_STATUS_DATA(d)	(((u64)d) << 32)
>  #define QI_IWD_STATUS_WRITE	(((u64)1) << 5)
> +#define QI_IWD_FENCE		(((u64)1) << 6)
>  #define QI_IWD_PRQ_DRAIN	(((u64)1) << 7)
>  
>  #define QI_IOTLB_DID(did) 	(((u64)did) << 16)
> @@ -590,6 +593,7 @@ struct intel_iommu {
>  #ifdef CONFIG_INTEL_IOMMU_SVM
>  	struct page_req_dsc *prq;
>  	unsigned char prq_name[16];    /* Name for PRQ interrupt */
> +	struct completion prq_complete;
>  	struct ioasid_allocator_ops pasid_allocator; /* Custom
> allocator for PASIDs */ #endif
>  	struct q_inval  *qi;            /* Queued invalidation info
> */
[Jacob Pan]
Powered by blists - more mailing lists
 
