[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3724716.kQq0lBPeGt@jernej-laptop>
Date: Tue, 14 Mar 2023 21:06:12 +0100
From: Jernej Škrabec <jernej.skrabec@...il.com>
To: Joerg Roedel <joro@...tes.org>,
Matthew Rosato <mjrosato@...ux.ibm.com>,
Will Deacon <will@...nel.org>,
Wenjia Zhang <wenjia@...ux.ibm.com>,
Robin Murphy <robin.murphy@....com>,
Jason Gunthorpe <jgg@...pe.ca>,
Niklas Schnelle <schnelle@...ux.ibm.com>
Cc: Gerd Bayer <gbayer@...ux.ibm.com>,
Julian Ruess <julianr@...ux.ibm.com>,
Pierre Morel <pmorel@...ux.ibm.com>,
Alexandra Winter <wintera@...ux.ibm.com>,
Heiko Carstens <hca@...ux.ibm.com>,
Vasily Gorbik <gor@...ux.ibm.com>,
Alexander Gordeev <agordeev@...ux.ibm.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>,
Suravee Suthikulpanit <suravee.suthikulpanit@....com>,
Hector Martin <marcan@...can.st>,
Sven Peter <sven@...npeter.dev>,
Alyssa Rosenzweig <alyssa@...enzweig.io>,
David Woodhouse <dwmw2@...radead.org>,
Lu Baolu <baolu.lu@...ux.intel.com>,
Andy Gross <agross@...nel.org>,
Bjorn Andersson <andersson@...nel.org>,
Konrad Dybcio <konrad.dybcio@...aro.org>,
Yong Wu <yong.wu@...iatek.com>,
Matthias Brugger <matthias.bgg@...il.com>,
AngeloGioacchino Del Regno
<angelogioacchino.delregno@...labora.com>,
Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
Orson Zhai <orsonzhai@...il.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Chunyan Zhang <zhang.lyra@...il.com>,
Chen-Yu Tsai <wens@...e.org>,
Samuel Holland <samuel@...lland.org>,
Thierry Reding <thierry.reding@...il.com>,
Krishna Reddy <vdumpa@...dia.com>,
Jonathan Hunter <jonathanh@...dia.com>,
Niklas Schnelle <schnelle@...ux.ibm.com>,
Jonathan Corbet <corbet@....net>, linux-s390@...r.kernel.org,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
iommu@...ts.linux.dev, asahi@...ts.linux.dev,
linux-arm-kernel@...ts.infradead.org,
linux-arm-msm@...r.kernel.org, linux-mediatek@...ts.infradead.org,
linux-sunxi@...ts.linux.dev, linux-tegra@...r.kernel.org,
linux-doc@...r.kernel.org
Subject: Re: [PATCH v8 2/6] iommu: Allow .iotlb_sync_map to fail and handle s390's
-ENOMEM return
Dne petek, 10. marec 2023 ob 17:07:47 CET je Niklas Schnelle napisal(a):
> On s390 when using a paging hypervisor, .iotlb_sync_map is used to sync
> mappings by letting the hypervisor inspect the synced IOVA range and
> updating a shadow table. This however means that .iotlb_sync_map can
> fail as the hypervisor may run out of resources while doing the sync.
> This can be due to the hypervisor being unable to pin guest pages, due
> to a limit on mapped addresses such as vfio_iommu_type1.dma_entry_limit
> or lack of other resources. Either way such a failure to sync a mapping
> should result in a DMA_MAPPING_ERROR.
>
> Now especially when running with batched IOTLB flushes for unmap it may
> be that some IOVAs have already been invalidated but not yet synced via
> .iotlb_sync_map. Thus if the hypervisor indicates running out of
> resources, first do a global flush allowing the hypervisor to free
> resources associated with these mappings as well a retry creating the
> new mappings and only if that also fails report this error to callers.
>
> Reviewed-by: Lu Baolu <baolu.lu@...ux.intel.com>
> Reviewed-by: Matthew Rosato <mjrosato@...ux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@...ux.ibm.com>
> ---
> drivers/iommu/amd/iommu.c | 5 +++--
> drivers/iommu/apple-dart.c | 5 +++--
> drivers/iommu/intel/iommu.c | 5 +++--
> drivers/iommu/iommu.c | 20 ++++++++++++++++----
> drivers/iommu/msm_iommu.c | 5 +++--
> drivers/iommu/mtk_iommu.c | 5 +++--
> drivers/iommu/s390-iommu.c | 29 ++++++++++++++++++++++++-----
> drivers/iommu/sprd-iommu.c | 5 +++--
> drivers/iommu/sun50i-iommu.c | 4 +++-
For sun50i:
Acked-by: Jernej Skrabec <jernej.skrabec@...il.com>
Best regards,
Jernej
> drivers/iommu/tegra-gart.c | 5 +++--
> include/linux/iommu.h | 4 ++--
> 11 files changed, 66 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index 5a505ba5467e..ff309bd1bb8f 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2187,14 +2187,15 @@ static int amd_iommu_attach_device(struct
> iommu_domain *dom, return ret;
> }
>
> -static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
> - unsigned long iova, size_t
size)
> +static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
> + unsigned long iova, size_t
size)
> {
> struct protection_domain *domain = to_pdomain(dom);
> struct io_pgtable_ops *ops = &domain->iop.iop.ops;
>
> if (ops->map_pages)
> domain_flush_np_cache(domain, iova, size);
> + return 0;
> }
>
> static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long
> iova, diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
> index 06169d36eab8..cbed1f87eae9 100644
> --- a/drivers/iommu/apple-dart.c
> +++ b/drivers/iommu/apple-dart.c
> @@ -506,10 +506,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain
> *domain, apple_dart_domain_flush_tlb(to_dart_domain(domain));
> }
>
> -static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
> - unsigned long iova, size_t
size)
> +static int apple_dart_iotlb_sync_map(struct iommu_domain *domain,
> + unsigned long iova, size_t
size)
> {
> apple_dart_domain_flush_tlb(to_dart_domain(domain));
> + return 0;
> }
>
> static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 7c2f4bd33582..b795b2f323e3 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -4745,8 +4745,8 @@ static bool risky_device(struct pci_dev *pdev)
> return false;
> }
>
> -static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
> - unsigned long iova, size_t
size)
> +static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
> + unsigned long iova, size_t
size)
> {
> struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> unsigned long pages = aligned_nrpages(iova, size);
> @@ -4756,6 +4756,7 @@ static void intel_iommu_iotlb_sync_map(struct
> iommu_domain *domain,
>
> xa_for_each(&dmar_domain->iommu_array, i, info)
> __mapping_notify_one(info->iommu, dmar_domain, pfn,
pages);
> + return 0;
> }
>
> static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t
> pasid) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 10db680acaed..ae549b032a16 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2410,8 +2410,17 @@ int iommu_map(struct iommu_domain *domain, unsigned
> long iova, return -EINVAL;
>
> ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
> - if (ret == 0 && ops->iotlb_sync_map)
> - ops->iotlb_sync_map(domain, iova, size);
> + if (ret == 0 && ops->iotlb_sync_map) {
> + ret = ops->iotlb_sync_map(domain, iova, size);
> + if (ret)
> + goto out_err;
> + }
> +
> + return ret;
> +
> +out_err:
> + /* undo mappings already done */
> + iommu_unmap(domain, iova, size);
>
> return ret;
> }
> @@ -2552,8 +2561,11 @@ ssize_t iommu_map_sg(struct iommu_domain *domain,
> unsigned long iova, sg = sg_next(sg);
> }
>
> - if (ops->iotlb_sync_map)
> - ops->iotlb_sync_map(domain, iova, mapped);
> + if (ops->iotlb_sync_map) {
> + ret = ops->iotlb_sync_map(domain, iova, mapped);
> + if (ret)
> + goto out_err;
> + }
> return mapped;
>
> out_err:
> diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
> index 454f6331c889..2033716eac78 100644
> --- a/drivers/iommu/msm_iommu.c
> +++ b/drivers/iommu/msm_iommu.c
> @@ -486,12 +486,13 @@ static int msm_iommu_map(struct iommu_domain *domain,
> unsigned long iova, return ret;
> }
>
> -static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long
> iova, - size_t size)
> +static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long
> iova, + size_t size)
> {
> struct msm_priv *priv = to_msm_priv(domain);
>
> __flush_iotlb_range(iova, size, SZ_4K, false, priv);
> + return 0;
> }
>
> static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long
> iova, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index d5a4955910ff..29769fb5c51e 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -750,12 +750,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain
> *domain, mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank);
> }
>
> -static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long
> iova, - size_t size)
> +static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long
> iova, + size_t size)
> {
> struct mtk_iommu_domain *dom = to_mtk_domain(domain);
>
> mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank);
> + return 0;
> }
>
> static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index fbf59a8db29b..17174b35db11 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -205,6 +205,14 @@ static void s390_iommu_release_device(struct device
> *dev) __s390_iommu_detach_device(zdev);
> }
>
> +
> +static int zpci_refresh_all(struct zpci_dev *zdev)
> +{
> + return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
> + zdev->end_dma - zdev->start_dma
+ 1);
> +
> +}
> +
> static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
> {
> struct s390_domain *s390_domain = to_s390_domain(domain);
> @@ -212,8 +220,7 @@ static void s390_iommu_flush_iotlb_all(struct
> iommu_domain *domain)
>
> rcu_read_lock();
> list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
> - zpci_refresh_trans((u64)zdev->fh << 32, zdev-
>start_dma,
> - zdev->end_dma - zdev-
>start_dma + 1);
> + zpci_refresh_all(zdev);
> }
> rcu_read_unlock();
> }
> @@ -237,20 +244,32 @@ static void s390_iommu_iotlb_sync(struct iommu_domain
> *domain, rcu_read_unlock();
> }
>
> -static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
> +static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
> unsigned long iova, size_t
size)
> {
> struct s390_domain *s390_domain = to_s390_domain(domain);
> struct zpci_dev *zdev;
> + int ret = 0;
>
> rcu_read_lock();
> list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
> if (!zdev->tlb_refresh)
> continue;
> - zpci_refresh_trans((u64)zdev->fh << 32,
> - iova, size);
> + ret = zpci_refresh_trans((u64)zdev->fh << 32,
> + iova, size);
> + /*
> + * let the hypervisor discover invalidated entries
> + * allowing it to free IOVAs and unpin pages
> + */
> + if (ret == -ENOMEM) {
> + ret = zpci_refresh_all(zdev);
> + if (ret)
> + break;
> + }
> }
> rcu_read_unlock();
> +
> + return ret;
> }
>
> static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
> diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
> index ae94d74b73f4..74bcae69653c 100644
> --- a/drivers/iommu/sprd-iommu.c
> +++ b/drivers/iommu/sprd-iommu.c
> @@ -315,8 +315,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain
> *domain, unsigned long iova, return size;
> }
>
> -static void sprd_iommu_sync_map(struct iommu_domain *domain,
> - unsigned long iova, size_t size)
> +static int sprd_iommu_sync_map(struct iommu_domain *domain,
> + unsigned long iova, size_t size)
> {
> struct sprd_iommu_domain *dom = to_sprd_domain(domain);
> unsigned int reg;
> @@ -328,6 +328,7 @@ static void sprd_iommu_sync_map(struct iommu_domain
> *domain,
>
> /* clear IOMMU TLB buffer after page table updated */
> sprd_iommu_write(dom->sdev, reg, 0xffffffff);
> + return 0;
> }
>
> static void sprd_iommu_sync(struct iommu_domain *domain,
> diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
> index 2d993d0cea7d..60a983f4a494 100644
> --- a/drivers/iommu/sun50i-iommu.c
> +++ b/drivers/iommu/sun50i-iommu.c
> @@ -402,7 +402,7 @@ static void sun50i_iommu_flush_iotlb_all(struct
> iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags);
> }
>
> -static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
> +static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
> unsigned long iova,
size_t size)
> {
> struct sun50i_iommu_domain *sun50i_domain =
to_sun50i_domain(domain);
> @@ -412,6 +412,8 @@ static void sun50i_iommu_iotlb_sync_map(struct
> iommu_domain *domain, spin_lock_irqsave(&iommu->iommu_lock, flags);
> sun50i_iommu_zap_range(iommu, iova, size);
> spin_unlock_irqrestore(&iommu->iommu_lock, flags);
> +
> + return 0;
> }
>
> static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
> diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
> index a482ff838b53..44966d7b07ba 100644
> --- a/drivers/iommu/tegra-gart.c
> +++ b/drivers/iommu/tegra-gart.c
> @@ -252,10 +252,11 @@ static int gart_iommu_of_xlate(struct device *dev,
> return 0;
> }
>
> -static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long
> iova, - size_t size)
> +static int gart_iommu_sync_map(struct iommu_domain *domain, unsigned long
> iova, + size_t size)
> {
> FLUSH_GART_REGS(gart_handle);
> + return 0;
> }
>
> static void gart_iommu_sync(struct iommu_domain *domain,
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 6595454d4f48..932e5532ee33 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -333,8 +333,8 @@ struct iommu_domain_ops {
> struct iommu_iotlb_gather
*iotlb_gather);
>
> void (*flush_iotlb_all)(struct iommu_domain *domain);
> - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long
iova,
> - size_t size);
> + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long
iova,
> + size_t size);
> void (*iotlb_sync)(struct iommu_domain *domain,
> struct iommu_iotlb_gather *iotlb_gather);
Powered by blists - more mailing lists