lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 18 Mar 2024 15:37:16 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Petr Tesarik <petrtesarik@...weicloud.com>, Christoph Hellwig
	<hch@....de>, Marek Szyprowski <m.szyprowski@...sung.com>, Robin Murphy
	<robin.murphy@....com>, Petr Tesarik <petr.tesarik1@...wei-partners.com>,
	Will Deacon <will@...nel.org>, open list <linux-kernel@...r.kernel.org>,
	"open list:DMA MAPPING HELPERS" <iommu@...ts.linux.dev>
CC: Roberto Sassu <roberto.sassu@...weicloud.com>, Petr Tesarik
	<petr@...arici.cz>
Subject: RE: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to
 alloc_align_mask if necessary

From: Petr Tesarik <petrtesarik@...weicloud.com> Sent: Monday, March 18, 2024 6:05 AM
> 
> Allow a buffer pre-padding of up to alloc_align_mask. If the allocation
> alignment is bigger than IO_TLB_SIZE and min_align_mask covers any non-zero
> bits in the original address between IO_TLB_SIZE and alloc_align_mask,
> these bits are not preserved in the swiotlb buffer address.
> 
> To fix this case, increase the allocation size and use a larger offset
> within the allocated buffer. As a result, extra padding slots may be
> allocated before the mapping start address.
> 
> Set the orig_addr in these padding slots to INVALID_PHYS_ADDR, because they
> do not correspond to any CPU buffer and the data must never be synced.

This paragraph is now obsolete.

> 
> The padding slots should be automatically released when the buffer is
> unmapped. However, swiotlb_tbl_unmap_single() takes only the address of the
> DMA buffer slot, not the first padding slot. Save the number of padding
> slots in struct io_tlb_slot and use it to adjust the slot index in
> swiotlb_release_slots(), so all allocated slots are properly freed.
> 
> Fixes: 2fd4fa5d3fb5 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present")
> Link: https://lore.kernel.org/linux-iommu/20240311210507.217daf8b@meshulam.tesarici.cz/
> Signed-off-by: Petr Tesarik <petr.tesarik1@...wei-partners.com>
> ---
>  kernel/dma/swiotlb.c | 35 +++++++++++++++++++++++++++++------
>  1 file changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 86fe172b5958..aefb05ff55e7 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -69,11 +69,14 @@
>   * @alloc_size:	Size of the allocated buffer.
>   * @list:	The free list describing the number of free entries available
>   *		from each index.
> + * @pad_slots:	Number of preceding padding slots. Valid only in the first
> + *		allocated non-padding slot.
>   */
>  struct io_tlb_slot {
>  	phys_addr_t orig_addr;
>  	size_t alloc_size;
> -	unsigned int list;
> +	unsigned short list;
> +	unsigned short pad_slots;
>  };
> 
>  static bool swiotlb_force_bounce;
> @@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
>  					 mem->nslabs - i);
>  		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
>  		mem->slots[i].alloc_size = 0;
> +		mem->slots[i].pad_slots = 0;
>  	}
> 
>  	memset(vaddr, 0, bytes);
> @@ -1328,11 +1332,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>  		unsigned long attrs)
>  {
>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> -	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> +	unsigned int offset;
>  	struct io_tlb_pool *pool;
>  	unsigned int i;
>  	int index;
>  	phys_addr_t tlb_addr;
> +	unsigned short pad_slots;
> 
>  	if (!mem || !mem->nslabs) {
>  		dev_warn_ratelimited(dev,
> @@ -1349,6 +1354,15 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>  		return (phys_addr_t)DMA_MAPPING_ERROR;
>  	}
> 
> +	/*
> +	 * Calculate buffer pre-padding within the allocated space. Use it to
> +	 * preserve the low bits of the original address according to device's
> +	 * min_align_mask. Limit the padding to alloc_align_mask or slot size
> +	 * (whichever is bigger); higher bits of the original address are
> +	 * preserved by selecting a suitable IO TLB slot.
> +	 */
> +	offset = orig_addr & dma_get_min_align_mask(dev) &
> +		(alloc_align_mask | (IO_TLB_SIZE - 1));
>  	index = swiotlb_find_slots(dev, orig_addr,
>  				   alloc_size + offset, alloc_align_mask, &pool);
>  	if (index == -1) {
> @@ -1364,6 +1378,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>  	 * This is needed when we sync the memory.  Then we sync the buffer if
>  	 * needed.
>  	 */
> +	pad_slots = offset / IO_TLB_SIZE;
> +	offset %= IO_TLB_SIZE;
> +	index += pad_slots;
> +	pool->slots[index].pad_slots = i;

The above line should be:
	pool->slots[index].pad_slots = pad_slots;

>  	for (i = 0; i < nr_slots(alloc_size + offset); i++)
>  		pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
>  	tlb_addr = slot_addr(pool->start, index) + offset;
> @@ -1385,12 +1403,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
>  	struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
>  	unsigned long flags;
>  	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
> -	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> -	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
> -	int aindex = index / mem->area_nslabs;
> -	struct io_tlb_area *area = &mem->areas[aindex];
> +	int index, nslots, aindex;
> +	struct io_tlb_area *area;
>  	int count, i;
> 
> +	index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> +	index -= mem->slots[index].pad_slots;
> +	nslots = nr_slots(mem->slots[index].alloc_size + offset);
> +	aindex = index / mem->area_nslabs;
> +	area = &mem->areas[aindex];
> +
>  	/*
>  	 * Return the buffer to the free list by setting the corresponding
>  	 * entries to indicate the number of contiguous entries available.
> @@ -1413,6 +1435,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
>  		mem->slots[i].list = ++count;
>  		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
>  		mem->slots[i].alloc_size = 0;
> +		mem->slots[i].pad_slots = 0;
>  	}
> 
>  	/*
> --
> 2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ