lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 12 Jul 2016 12:33:43 +0100
From:	Robin Murphy <robin.murphy@....com>
To:	Joerg Roedel <joro@...tes.org>, iommu@...ts.linux-foundation.org
Cc:	Vincent.Wan@....com, Joerg Roedel <jroedel@...e.de>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 16/20] iommu/amd: Optimize map_sg and unmap_sg

On 08/07/16 12:45, Joerg Roedel wrote:
> From: Joerg Roedel <jroedel@...e.de>
> 
> Optimize these functions so that they need only one call
> into the address alloctor. This also saves a couple of
> io-tlb flushes in the unmap_sg path.
> 
> Signed-off-by: Joerg Roedel <jroedel@...e.de>
> ---
>  drivers/iommu/amd_iommu.c | 77 ++++++++++++++++++++++++++++++++---------------
>  1 file changed, 52 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 78b278b..e5f8e7f 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -2409,45 +2409,70 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
>   * lists).
>   */
>  static int map_sg(struct device *dev, struct scatterlist *sglist,
> -		  int nelems, enum dma_data_direction dir,
> +		  int nelems, enum dma_data_direction direction,
>  		  struct dma_attrs *attrs)
>  {
> +	int mapped_pages = 0, npages = 0, prot = 0, i;
> +	unsigned long start_addr, address;
>  	struct protection_domain *domain;
> -	int i;
> +	struct dma_ops_domain *dma_dom;
>  	struct scatterlist *s;
> -	phys_addr_t paddr;
> -	int mapped_elems = 0;
>  	u64 dma_mask;
>  
>  	domain = get_domain(dev);
>  	if (IS_ERR(domain))
>  		return 0;
>  
> +	dma_dom  = domain->priv;
>  	dma_mask = *dev->dma_mask;
>  
> +	for_each_sg(sglist, s, nelems, i)
> +		npages += iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);

This fails to account for the segment boundary mask[1]. Given a typical
sglist from the block layer where the boundary mask is 64K, the first
segment is 8k long, and subsequent segments are 64K long, those
subsequent segments will end up with misaligned addresses which certain
hardware may object to.

> +	address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);

Since a typical dma_map_sg() call is likely to involve >128K worth of
data, I wonder if it's worth going directly to a slow-path IOVA
allocation...

> +	if (address == DMA_ERROR_CODE)
> +		goto out_err;
> +
> +	start_addr = address;
> +	prot       = dir2prot(direction);
> +
>  	for_each_sg(sglist, s, nelems, i) {
> -		paddr = sg_phys(s);
> +		int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
> +
> +		for (j = 0; j < pages; ++j) {
> +			unsigned long bus_addr, phys_addr;
> +			int ret;
>  
> -		s->dma_address = __map_single(dev, domain->priv,
> -					      paddr, s->length, dir, dma_mask);
> +			bus_addr  = address + (j << PAGE_SHIFT);
> +			phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
> +			ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
> +			if (ret)
> +				goto out_unmap;
> +
> +			mapped_pages += 1;
> +		}
>  
> -		if (s->dma_address) {
> -			s->dma_length = s->length;
> -			mapped_elems++;
> -		} else
> -			goto unmap;
> +		s->dma_address = address + s->offset;
> +		s->dma_length  = s->length;
> +		address += pages << PAGE_SHIFT;
>  	}
>  
> -	return mapped_elems;
> +	return nelems;
>  
> -unmap:
> -	for_each_sg(sglist, s, mapped_elems, i) {
> -		if (s->dma_address)
> -			__unmap_single(domain->priv, s->dma_address,
> -				       s->dma_length, dir);
> -		s->dma_address = s->dma_length = 0;
> +
> +out_unmap:
> +	pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
> +	       dev_name(dev), npages);
> +
> +	for (i = 0; i < mapped_pages; ++i) {
> +		iommu_unmap_page(domain,
> +				 start_addr + (i << PAGE_SHIFT),
> +				 PAGE_SIZE);
>  	}
>  
> +	free_iova_fast(&dma_dom->iovad, start_addr, npages);
> +
> +out_err:
>  	return 0;
>  }
>  
> @@ -2460,18 +2485,20 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
>  		     struct dma_attrs *attrs)
>  {
>  	struct protection_domain *domain;
> +	unsigned long startaddr;
>  	struct scatterlist *s;
> -	int i;
> +	int i,npages = 0;
>  
>  	domain = get_domain(dev);
>  	if (IS_ERR(domain))
>  		return;
>  
> -	for_each_sg(sglist, s, nelems, i) {
> -		__unmap_single(domain->priv, s->dma_address,
> -			       s->dma_length, dir);
> -		s->dma_address = s->dma_length = 0;
> -	}
> +	for_each_sg(sglist, s, nelems, i)
> +		npages += iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);

...which would also then allow this to be further simplified down to the
find_iova() trick we use in iommu-dma.

Robin.

[1]:http://article.gmane.org/gmane.linux.kernel.iommu/10553 - almost the
1-year anniversary of you making much the same comment to me :D

> +
> +	startaddr = sg_dma_address(sglist) & PAGE_MASK;
> +
> +	__unmap_single(domain->priv, startaddr, npages << PAGE_SHIFT, dir);
>  }
>  
>  /*
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ