[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160712133042.GG12639@8bytes.org>
Date: Tue, 12 Jul 2016 15:30:42 +0200
From: Joerg Roedel <joro@...tes.org>
To: Robin Murphy <robin.murphy@....com>
Cc: iommu@...ts.linux-foundation.org, Vincent.Wan@....com,
Joerg Roedel <jroedel@...e.de>, linux-kernel@...r.kernel.org
Subject: [PATCH 16/20 v2] iommu/amd: Optimize map_sg and unmap_sg
On Tue, Jul 12, 2016 at 12:33:43PM +0100, Robin Murphy wrote:
> > + for_each_sg(sglist, s, nelems, i)
> > + npages += iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
>
> This fails to account for the segment boundary mask[1]. Given a typical
> sglist from the block layer where the boundary mask is 64K, the first
> segment is 8k long, and subsequent segments are 64K long, those
> subsequent segments will end up with misaligned addresses which certain
> hardware may object to.
Yeah, right. It doesn't matter much on x86, as the smallest
boundary-mask I have seen is 4G, but to be correct it should be
accounted in. How does the attached patch look?
>
> > + address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
>
> Since a typical dma_map_sg() call is likely to involve >128K worth of
> data, I wonder if it's worth going directly to a slow-path IOVA
> allocation...
Well, the allocator is the bottle-neck, so I try not to call it for
every sg-element. The global locks have been removed, but more
allocations/deallocations also mean that the per-cpu free-lists fill up
quicker and that we have to flush the IOTLBs more often, which costs
performance.
> [1]:http://article.gmane.org/gmane.linux.kernel.iommu/10553 - almost the
> 1-year anniversary of you making much the same comment to me :D
Touché ;-)
Here is the updated patch:
>From 88e1cc6c8e854a2bf55f972ddc5082a44760abe2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@...e.de>
Date: Wed, 6 Jul 2016 17:20:54 +0200
Subject: [PATCH 1/5] iommu/amd: Optimize map_sg and unmap_sg
Optimize these functions so that they need only one call
into the address alloctor. This also saves a couple of
io-tlb flushes in the unmap_sg path.
Signed-off-by: Joerg Roedel <jroedel@...e.de>
---
drivers/iommu/amd_iommu.c | 112 +++++++++++++++++++++++++++++++++++-----------
1 file changed, 86 insertions(+), 26 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2cd382e..203c50c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2395,50 +2395,110 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
__unmap_single(domain->priv, dma_addr, size, dir);
}
+static int sg_num_pages(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems)
+{
+ unsigned long mask, boundary_size;
+ struct scatterlist *s;
+ int i, npages = 0;
+
+ mask = dma_get_seg_boundary(dev);
+ boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
+ 1UL << (BITS_PER_LONG - PAGE_SHIFT);
+
+ for_each_sg(sglist, s, nelems, i) {
+ int p, n;
+
+ s->dma_address = npages << PAGE_SHIFT;
+ p = npages % boundary_size;
+ n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+ if (p + n > boundary_size)
+ npages += boundary_size - p;
+ npages += n;
+ }
+
+ return npages;
+}
+
/*
* The exported map_sg function for dma_ops (handles scatter-gather
* lists).
*/
static int map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction dir,
+ int nelems, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
+ int mapped_pages = 0, npages = 0, prot = 0, i;
struct protection_domain *domain;
- int i;
+ struct dma_ops_domain *dma_dom;
struct scatterlist *s;
- phys_addr_t paddr;
- int mapped_elems = 0;
+ unsigned long address;
u64 dma_mask;
domain = get_domain(dev);
if (IS_ERR(domain))
return 0;
+ dma_dom = domain->priv;
dma_mask = *dev->dma_mask;
+ npages = sg_num_pages(dev, sglist, nelems);
+
+ address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
+ if (address == DMA_ERROR_CODE)
+ goto out_err;
+
+ prot = dir2prot(direction);
+
+ /* Map all sg entries */
for_each_sg(sglist, s, nelems, i) {
- paddr = sg_phys(s);
+ int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+
+ for (j = 0; j < pages; ++j) {
+ unsigned long bus_addr, phys_addr;
+ int ret;
- s->dma_address = __map_single(dev, domain->priv,
- paddr, s->length, dir, dma_mask);
+ bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
+ phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
+ ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
+ if (ret)
+ goto out_unmap;
- if (s->dma_address) {
- s->dma_length = s->length;
- mapped_elems++;
- } else
- goto unmap;
+ mapped_pages += 1;
+ }
}
- return mapped_elems;
+ /* Everything is mapped - write the right values into s->dma_address */
+ for_each_sg(sglist, s, nelems, i) {
+ s->dma_address += address + s->offset;
+ s->dma_length = s->length;
+ }
+
+ return nelems;
+
+out_unmap:
+ pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
+ dev_name(dev), npages);
+
+ for_each_sg(sglist, s, nelems, i) {
+ int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+
+ for (j = 0; j < pages; ++j) {
+ unsigned long bus_addr;
-unmap:
- for_each_sg(sglist, s, mapped_elems, i) {
- if (s->dma_address)
- __unmap_single(domain->priv, s->dma_address,
- s->dma_length, dir);
- s->dma_address = s->dma_length = 0;
+ bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
+ iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
+
+ if (--mapped_pages)
+ goto out_free_iova;
+ }
}
+out_free_iova:
+ free_iova_fast(&dma_dom->iovad, address, npages);
+
+out_err:
return 0;
}
@@ -2451,18 +2511,18 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
struct protection_domain *domain;
- struct scatterlist *s;
- int i;
+ unsigned long startaddr;
+ int npages = 2;
domain = get_domain(dev);
if (IS_ERR(domain))
return;
- for_each_sg(sglist, s, nelems, i) {
- __unmap_single(domain->priv, s->dma_address,
- s->dma_length, dir);
- s->dma_address = s->dma_length = 0;
- }
+ startaddr = sg_dma_address(sglist) & PAGE_MASK;
+ dma_dom = domain->priv;
+ npages = sg_num_pages(dev, sglist, nelems);
+
+ __unmap_single(domain->priv, startaddr, npages << PAGE_SHIFT, dir);
}
/*
--
2.6.6
Powered by blists - more mailing lists