[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e1bdbf24-173b-a702-3c03-8e2c46bcf633@linux.intel.com>
Date: Sun, 31 Jul 2022 21:39:27 +0800
From: Baolu Lu <baolu.lu@...ux.intel.com>
To: Yi Liu <yi.l.liu@...el.com>, Joerg Roedel <joro@...tes.org>,
Jason Gunthorpe <jgg@...dia.com>,
Christoph Hellwig <hch@...radead.org>,
Kevin Tian <kevin.tian@...el.com>,
Ashok Raj <ashok.raj@...el.com>, Will Deacon <will@...nel.org>,
Robin Murphy <robin.murphy@....com>,
Jean-Philippe Brucker <jean-philippe@...aro.com>,
Dave Jiang <dave.jiang@...el.com>,
Vinod Koul <vkoul@...nel.org>
Cc: baolu.lu@...ux.intel.com, Eric Auger <eric.auger@...hat.com>,
Jacob jun Pan <jacob.jun.pan@...el.com>,
Zhangfei Gao <zhangfei.gao@...aro.org>,
Zhu Tony <tony.zhu@...el.com>, iommu@...ts.linux.dev,
linux-kernel@...r.kernel.org,
Jacob Pan <jacob.jun.pan@...ux.intel.com>,
Jean-Philippe Brucker <jean-philippe@...aro.org>
Subject: Re: [PATCH v10 03/12] iommu: Remove SVM_FLAG_SUPERVISOR_MODE support
On 2022/7/31 20:01, Yi Liu wrote:
> On 2022/7/5 13:07, Lu Baolu wrote:
>> The current kernel DMA with PASID support is based on the SVA with a flag
>> SVM_FLAG_SUPERVISOR_MODE. The IOMMU driver binds the kernel memory
>> address
>> space to a PASID of the device. The device driver programs the device
>> with
>> kernel virtual address (KVA) for DMA access. There have been security and
>> functional issues with this approach:
>>
>> - The lack of IOTLB synchronization upon kernel page table updates.
>> (vmalloc, module/BPF loading, CONFIG_DEBUG_PAGEALLOC etc.)
>> - Other than slight more protection, using kernel virtual address (KVA)
>> has little advantage over physical address. There are also no use
>> cases yet where DMA engines need kernel virtual addresses for
>> in-kernel
>> DMA.
>>
>> This removes SVM_FLAG_SUPERVISOR_MODE support from the IOMMU interface.
>> The device drivers are suggested to handle kernel DMA with PASID through
>> the kernel DMA APIs.
>>
>> The drvdata parameter in iommu_sva_bind_device() and all callbacks is not
>> needed anymore. Cleanup them as well.
>>
>> Link:
>> https://lore.kernel.org/linux-iommu/20210511194726.GP1002214@nvidia.com/
>> Signed-off-by: Jacob Pan <jacob.jun.pan@...ux.intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
>> Reviewed-by: Jason Gunthorpe <jgg@...dia.com>
>> Reviewed-by: Jean-Philippe Brucker <jean-philippe@...aro.org>
>> Reviewed-by: Kevin Tian <kevin.tian@...el.com>
>> Tested-by: Zhangfei Gao <zhangfei.gao@...aro.org>
>> Tested-by: Tony Zhu <tony.zhu@...el.com>
>> ---
>> include/linux/intel-iommu.h | 3 +-
>> include/linux/intel-svm.h | 13 -----
>> include/linux/iommu.h | 8 +--
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 +-
>> drivers/dma/idxd/cdev.c | 3 +-
>> drivers/dma/idxd/init.c | 25 +-------
>> .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 3 +-
>> drivers/iommu/intel/svm.c | 57 +++++--------------
>> drivers/iommu/iommu.c | 5 +-
>> drivers/misc/uacce/uacce.c | 2 +-
>> 10 files changed, 26 insertions(+), 98 deletions(-)
>>
>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index e065cbe3c857..31e3edc0fc7e 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -738,8 +738,7 @@ struct intel_iommu *device_to_iommu(struct device
>> *dev, u8 *bus, u8 *devfn);
>> extern void intel_svm_check(struct intel_iommu *iommu);
>> extern int intel_svm_enable_prq(struct intel_iommu *iommu);
>> extern int intel_svm_finish_prq(struct intel_iommu *iommu);
>> -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct
>> *mm,
>> - void *drvdata);
>> +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct
>> *mm);
>> void intel_svm_unbind(struct iommu_sva *handle);
>> u32 intel_svm_get_pasid(struct iommu_sva *handle);
>> int intel_svm_page_response(struct device *dev, struct
>> iommu_fault_event *evt,
>> diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
>> index 207ef06ba3e1..f9a0d44f6fdb 100644
>> --- a/include/linux/intel-svm.h
>> +++ b/include/linux/intel-svm.h
>> @@ -13,17 +13,4 @@
>> #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
>> #define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5)
>> -/*
>> - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be
>> used only
>> - * for access to kernel addresses. No IOTLB flushes are automatically
>> done
>> - * for kernel mappings; it is valid only for access to the kernel's
>> static
>> - * 1:1 mapping of physical memory — not to vmalloc or even module
>> mappings.
>> - * A future API addition may permit the use of such ranges, by means
>> of an
>> - * explicit IOTLB flush call (akin to the DMA API's unmap method).
>> - *
>> - * It is unlikely that we will ever hook into
>> flush_tlb_kernel_range() to
>> - * do such IOTLB flushes automatically.
>> - */
>> -#define SVM_FLAG_SUPERVISOR_MODE BIT(0)
>> -
>> #endif /* __INTEL_SVM_H__ */
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 418a1914a041..f41eb2b3c7da 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -243,8 +243,7 @@ struct iommu_ops {
>> int (*dev_enable_feat)(struct device *dev, enum
>> iommu_dev_features f);
>> int (*dev_disable_feat)(struct device *dev, enum
>> iommu_dev_features f);
>> - struct iommu_sva *(*sva_bind)(struct device *dev, struct
>> mm_struct *mm,
>> - void *drvdata);
>> + struct iommu_sva *(*sva_bind)(struct device *dev, struct
>> mm_struct *mm);
>> void (*sva_unbind)(struct iommu_sva *handle);
>> u32 (*sva_get_pasid)(struct iommu_sva *handle);
>> @@ -669,8 +668,7 @@ int iommu_dev_disable_feature(struct device *dev,
>> enum iommu_dev_features f);
>> bool iommu_dev_feature_enabled(struct device *dev, enum
>> iommu_dev_features f);
>> struct iommu_sva *iommu_sva_bind_device(struct device *dev,
>> - struct mm_struct *mm,
>> - void *drvdata);
>> + struct mm_struct *mm);
>> void iommu_sva_unbind_device(struct iommu_sva *handle);
>> u32 iommu_sva_get_pasid(struct iommu_sva *handle);
>> @@ -1012,7 +1010,7 @@ iommu_dev_disable_feature(struct device *dev,
>> enum iommu_dev_features feat)
>> }
>> static inline struct iommu_sva *
>> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
>> *drvdata)
>> +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
>> {
>> return NULL;
>> }
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> index cd48590ada30..d2ba86470c42 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> @@ -754,8 +754,7 @@ bool arm_smmu_master_sva_enabled(struct
>> arm_smmu_master *master);
>> int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
>> int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
>> bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
>> -struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct
>> mm_struct *mm,
>> - void *drvdata);
>> +struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct
>> mm_struct *mm);
>> void arm_smmu_sva_unbind(struct iommu_sva *handle);
>> u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
>> void arm_smmu_sva_notifier_synchronize(void);
>> @@ -791,7 +790,7 @@ static inline bool
>> arm_smmu_master_iopf_supported(struct arm_smmu_master *master
>> }
>> static inline struct iommu_sva *
>> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
>> *drvdata)
>> +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
>> {
>> return ERR_PTR(-ENODEV);
>> }
>> diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
>> index c2808fd081d6..66720001ba1c 100644
>> --- a/drivers/dma/idxd/cdev.c
>> +++ b/drivers/dma/idxd/cdev.c
>> @@ -6,7 +6,6 @@
>> #include <linux/pci.h>
>> #include <linux/device.h>
>> #include <linux/sched/task.h>
>> -#include <linux/intel-svm.h>
>> #include <linux/io-64-nonatomic-lo-hi.h>
>> #include <linux/cdev.h>
>> #include <linux/fs.h>
>> @@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode,
>> struct file *filp)
>> filp->private_data = ctx;
>> if (device_user_pasid_enabled(idxd)) {
>> - sva = iommu_sva_bind_device(dev, current->mm, NULL);
>> + sva = iommu_sva_bind_device(dev, current->mm);
>> if (IS_ERR(sva)) {
>> rc = PTR_ERR(sva);
>> dev_err(dev, "pasid allocation failed: %d\n", rc);
>> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
>> index 355fb3ef4cbf..00b437f4f573 100644
>> --- a/drivers/dma/idxd/init.c
>> +++ b/drivers/dma/idxd/init.c
>> @@ -14,7 +14,6 @@
>> #include <linux/io-64-nonatomic-lo-hi.h>
>> #include <linux/device.h>
>> #include <linux/idr.h>
>> -#include <linux/intel-svm.h>
>> #include <linux/iommu.h>
>> #include <uapi/linux/idxd.h>
>> #include <linux/dmaengine.h>
>> @@ -466,29 +465,7 @@ static struct idxd_device *idxd_alloc(struct
>> pci_dev *pdev, struct idxd_driver_d
>> static int idxd_enable_system_pasid(struct idxd_device *idxd)
>> {
>> - int flags;
>> - unsigned int pasid;
>> - struct iommu_sva *sva;
>> -
>> - flags = SVM_FLAG_SUPERVISOR_MODE;
>> -
>> - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
>> - if (IS_ERR(sva)) {
>> - dev_warn(&idxd->pdev->dev,
>> - "iommu sva bind failed: %ld\n", PTR_ERR(sva));
>> - return PTR_ERR(sva);
>> - }
>> -
>> - pasid = iommu_sva_get_pasid(sva);
>> - if (pasid == IOMMU_PASID_INVALID) {
>> - iommu_sva_unbind_device(sva);
>> - return -ENODEV;
>> - }
>> -
>> - idxd->sva = sva;
>> - idxd->pasid = pasid;
>> - dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
>> - return 0;
>> + return -EOPNOTSUPP;
>
> this makes it to be a always fail call. right? will it break any
> existing idxd usage?
The existing implemenation is problematic. The right solution should be
to attach the default domain to a pasid of a device and handle the
kernel DMA through the formal DMA kernel APIs.
Jacob has already posted his v2 in the mailing list.
>
>> }
>> static void idxd_disable_system_pasid(struct idxd_device *idxd)
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
>> index 1ef7bbb4acf3..f155d406c5d5 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
>> @@ -367,8 +367,7 @@ __arm_smmu_sva_bind(struct device *dev, struct
>> mm_struct *mm)
>> return ERR_PTR(ret);
>> }
>> -struct iommu_sva *
>> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
>> *drvdata)
>> +struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct
>> mm_struct *mm)
>> {
>> struct iommu_sva *handle;
>> struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index 7ee37d996e15..d04880a291c3 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -313,8 +313,7 @@ static int pasid_to_svm_sdev(struct device *dev,
>> unsigned int pasid,
>> return 0;
>> }
>> -static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct
>> *mm,
>> - unsigned int flags)
>> +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct
>> *mm)
>> {
>> ioasid_t max_pasid = dev_is_pci(dev) ?
>> pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
>> @@ -324,8 +323,7 @@ static int intel_svm_alloc_pasid(struct device
>> *dev, struct mm_struct *mm,
>> static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
>
> would be great to see a cleanup to rename the svm terms in intel iommu
> driver to be sva. :-)
SVM is the term in Intel VT-d spec which existed before the term SVA.
It makes sense to make the naming consitent anyway. :-)
>> struct device *dev,
>> - struct mm_struct *mm,
>> - unsigned int flags)
>> + struct mm_struct *mm)
>> {
>> struct device_domain_info *info = dev_iommu_priv_get(dev);
>> unsigned long iflags, sflags;
>> @@ -341,22 +339,18 @@ static struct iommu_sva
>> *intel_svm_bind_mm(struct intel_iommu *iommu,
>> svm->pasid = mm->pasid;
>> svm->mm = mm;
>> - svm->flags = flags;
>> INIT_LIST_HEAD_RCU(&svm->devs);
>> - if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
>> - svm->notifier.ops = &intel_mmuops;
>> - ret = mmu_notifier_register(&svm->notifier, mm);
>> - if (ret) {
>> - kfree(svm);
>> - return ERR_PTR(ret);
>> - }
>> + svm->notifier.ops = &intel_mmuops;
>> + ret = mmu_notifier_register(&svm->notifier, mm);
>> + if (ret) {
>> + kfree(svm);
>> + return ERR_PTR(ret);
>> }
>> ret = pasid_private_add(svm->pasid, svm);
>> if (ret) {
>> - if (svm->notifier.ops)
>> - mmu_notifier_unregister(&svm->notifier, mm);
>> + mmu_notifier_unregister(&svm->notifier, mm);
>> kfree(svm);
>> return ERR_PTR(ret);
>> }
>> @@ -391,9 +385,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct
>> intel_iommu *iommu,
>> }
>> /* Setup the pasid table: */
>> - sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
>> - PASID_FLAG_SUPERVISOR_MODE : 0;
>> - sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ?
>> PASID_FLAG_FL5LP : 0;
>> + sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP
>> : 0;
>> spin_lock_irqsave(&iommu->lock, iflags);
>> ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
>> FLPT_DEFAULT_DID, sflags);
>> @@ -410,8 +402,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct
>> intel_iommu *iommu,
>> kfree(sdev);
>> free_svm:
>> if (list_empty(&svm->devs)) {
>> - if (svm->notifier.ops)
>> - mmu_notifier_unregister(&svm->notifier, mm);
>> + mmu_notifier_unregister(&svm->notifier, mm);
>> pasid_private_remove(mm->pasid);
>> kfree(svm);
>> }
>> @@ -767,7 +758,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> * to unbind the mm while any page faults are outstanding.
>> */
>> svm = pasid_private_find(req->pasid);
>> - if (IS_ERR_OR_NULL(svm) || (svm->flags &
>> SVM_FLAG_SUPERVISOR_MODE))
>> + if (IS_ERR_OR_NULL(svm))
>> goto bad_req;
>> }
>> @@ -818,40 +809,20 @@ static irqreturn_t prq_event_thread(int irq,
>> void *d)
>> return IRQ_RETVAL(handled);
>> }
>> -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct
>> *mm, void *drvdata)
>> +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct
>> *mm)
>> {
>> struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
>> - unsigned int flags = 0;
>> struct iommu_sva *sva;
>> int ret;
>> - if (drvdata)
>> - flags = *(unsigned int *)drvdata;
>> -
>> - if (flags & SVM_FLAG_SUPERVISOR_MODE) {
>> - if (!ecap_srs(iommu->ecap)) {
>> - dev_err(dev, "%s: Supervisor PASID not supported\n",
>> - iommu->name);
>> - return ERR_PTR(-EOPNOTSUPP);
>> - }
>> -
>> - if (mm) {
>> - dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
>> - iommu->name);
>> - return ERR_PTR(-EINVAL);
>> - }
>> -
>> - mm = &init_mm;
>> - }
>> -
>> mutex_lock(&pasid_mutex);
>> - ret = intel_svm_alloc_pasid(dev, mm, flags);
>> + ret = intel_svm_alloc_pasid(dev, mm);
>> if (ret) {
>> mutex_unlock(&pasid_mutex);
>> return ERR_PTR(ret);
>> }
>> - sva = intel_svm_bind_mm(iommu, dev, mm, flags);
>> + sva = intel_svm_bind_mm(iommu, dev, mm);
>> mutex_unlock(&pasid_mutex);
>> return sva;
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 0cb0750f61e8..74a0a3ec0907 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -2788,7 +2788,6 @@ EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
>> * iommu_sva_bind_device() - Bind a process address space to a device
>> * @dev: the device
>> * @mm: the mm to bind, caller must hold a reference to it
>> - * @drvdata: opaque data pointer to pass to bind callback
>> *
>> * Create a bond between device and address space, allowing the
>> device to access
>> * the mm using the returned PASID. If a bond already exists between
>> @device and
>> @@ -2801,7 +2800,7 @@ EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
>> * On error, returns an ERR_PTR value.
>> */
>> struct iommu_sva *
>> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
>> *drvdata)
>> +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
>> {
>> struct iommu_group *group;
>> struct iommu_sva *handle = ERR_PTR(-EINVAL);
>> @@ -2826,7 +2825,7 @@ iommu_sva_bind_device(struct device *dev, struct
>> mm_struct *mm, void *drvdata)
>> if (iommu_group_device_count(group) != 1)
>> goto out_unlock;
>> - handle = ops->sva_bind(dev, mm, drvdata);
>> + handle = ops->sva_bind(dev, mm);
>> out_unlock:
>> mutex_unlock(&group->mutex);
>> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
>> index 281c54003edc..3238a867ea51 100644
>> --- a/drivers/misc/uacce/uacce.c
>> +++ b/drivers/misc/uacce/uacce.c
>> @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device
>> *uacce, struct uacce_queue *q)
>> if (!(uacce->flags & UACCE_DEV_SVA))
>> return 0;
>> - handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
>> + handle = iommu_sva_bind_device(uacce->parent, current->mm);
>> if (IS_ERR(handle))
>> return PTR_ERR(handle);
>
Best regards,
baolu
Powered by blists - more mailing lists