lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <892fabb55ebe7bb35aa3f0be03ee3f93132b7acc.1527745258.git.baolin.wang@linaro.org>
Date:   Thu, 31 May 2018 13:55:33 +0800
From:   Baolin Wang <baolin.wang@...aro.org>
To:     hch@....de, m.szyprowski@...sung.com
Cc:     robin.murphy@....com, gregkh@...uxfoundation.org,
        iommu@...ts.linux-foundation.org, linux-kernel@...r.kernel.org,
        arnd@...db.de, broonie@...nel.org, baolin.wang@...aro.org
Subject: [PATCH 2/2] dma-coherent: Change the bitmap APIs

The device coherent memory uses the bitmap helper functions, which take an
order of PAGE_SIZE, that means the pages size is always a power of 2 as the
allocation region. For Example, allocating 33 MB from a 33 MB dma_mem region
requires 64MB free memory in that region.

Thus we can change to use bitmap_find_next_zero_area()/bitmap_set()/
bitmap_clear() to present the allocation coherent memory, and reduce the
allocation granularity to be one PAGE_SIZE.

Moreover from Arnd's description:
"I believe that bitmap_allocate_region() was chosen here because it is
more efficient than bitmap_find_next_zero_area(), at least that is the
explanation given in https://en.wikipedia.org/wiki/Buddy_memory_allocation.

It's quite possible that we don't actually care about efficiency of
dma_alloc_*() since a) it's supposed to be called very rarely, and
b) the overhead of accessing uncached memory is likely higher than the
search through a relatively small bitmap".

Thus I think we can convert to change the allocation granularity to be
one PAGE_SIZE replacing with new bitmap APIs, which will not cause
efficiency issue.

Signed-off-by: Baolin Wang <baolin.wang@...aro.org>
---
 drivers/base/dma-coherent.c |   54 +++++++++++++++++++++++--------------------
 include/linux/dma-mapping.h |    6 ++---
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index ce19832..4131540 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -143,34 +143,31 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
 					dma_addr_t device_addr, size_t size)
 {
 	struct dma_coherent_mem *mem = dev->dma_mem;
-	int order = get_order(size);
 	unsigned long flags;
-	int pos, err;
-
-	size += device_addr & ~PAGE_MASK;
+	int start_bit, nbits;
 
 	if (!mem)
 		return ERR_PTR(-EINVAL);
 
+	size += device_addr & ~PAGE_MASK;
+	nbits = (size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+
 	spin_lock_irqsave(&mem->spinlock, flags);
-	pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
-	err = bitmap_allocate_region(mem->bitmap, pos, order);
-	if (!err)
-		mem->avail -= 1 << order;
+	start_bit = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
+	bitmap_set(mem->bitmap, start_bit, nbits);
+	mem->avail -= nbits;
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 
-	if (err != 0)
-		return ERR_PTR(err);
-	return mem->virt_base + (pos << PAGE_SHIFT);
+	return mem->virt_base + (start_bit << PAGE_SHIFT);
 }
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 		ssize_t size, dma_addr_t *dma_handle)
 {
-	int order = get_order(size);
+	int nbits = (size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
 	unsigned long flags;
-	int pageno;
+	int start_bit, end_bit;
 	void *ret;
 
 	spin_lock_irqsave(&mem->spinlock, flags);
@@ -178,16 +175,22 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 	if (unlikely(size > (mem->avail << PAGE_SHIFT)))
 		goto err;
 
-	pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
-	if (unlikely(pageno < 0))
+	start_bit = 0;
+	end_bit = mem->size;
+
+	start_bit = bitmap_find_next_zero_area(mem->bitmap, end_bit, start_bit,
+					       nbits, 0);
+	if (start_bit >= end_bit)
 		goto err;
 
+	bitmap_set(mem->bitmap, start_bit, nbits);
+
 	/*
 	 * Memory was found in the coherent area.
 	 */
-	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
-	ret = mem->virt_base + (pageno << PAGE_SHIFT);
-	mem->avail -= 1 << order;
+	*dma_handle = mem->device_base + (start_bit << PAGE_SHIFT);
+	ret = mem->virt_base + (start_bit << PAGE_SHIFT);
+	mem->avail -= nbits;
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 	memset(ret, 0, size);
 	return ret;
@@ -241,16 +244,17 @@ void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
 }
 
 static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
-				       int order, void *vaddr)
+				       int size, void *vaddr)
 {
 	if (mem && vaddr >= mem->virt_base && vaddr <
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+		int nbits = (size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+		int start_bit = (vaddr - mem->virt_base) >> PAGE_SHIFT;
 		unsigned long flags;
 
 		spin_lock_irqsave(&mem->spinlock, flags);
-		bitmap_release_region(mem->bitmap, page, order);
-		mem->avail += 1 << order;
+		bitmap_clear(mem->bitmap, start_bit, nbits);
+		mem->avail += nbits;
 		spin_unlock_irqrestore(&mem->spinlock, flags);
 		return 1;
 	}
@@ -260,7 +264,7 @@ static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
 /**
  * dma_release_from_dev_coherent() - free memory to device coherent memory pool
  * @dev:	device from which the memory was allocated
- * @order:	the order of pages allocated
+ * @size:	size of release memory area
  * @vaddr:	virtual address of allocated pages
  *
  * This checks whether the memory was allocated from the per-device
@@ -269,11 +273,11 @@ static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
  * Returns 1 if we correctly released the memory, or 0 if the caller should
  * proceed with releasing memory from generic pools.
  */
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
+int dma_release_from_dev_coherent(struct device *dev, int size, void *vaddr)
 {
 	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
-	return __dma_release_from_coherent(mem, order, vaddr);
+	return __dma_release_from_coherent(mem, size, vaddr);
 }
 EXPORT_SYMBOL(dma_release_from_dev_coherent);
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..29ae92e 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -162,7 +162,7 @@ static inline int is_device_dma_capable(struct device *dev)
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 				       dma_addr_t *dma_handle, void **ret);
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
+int dma_release_from_dev_coherent(struct device *dev, int size, void *vaddr);
 
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 			    void *cpu_addr, size_t size, int *ret);
@@ -174,7 +174,7 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
 
 #else
 #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
-#define dma_release_from_dev_coherent(dev, order, vaddr) (0)
+#define dma_release_from_dev_coherent(dev, size, vaddr) (0)
 #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
 
 static inline void *dma_alloc_from_global_coherent(ssize_t size,
@@ -540,7 +540,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	BUG_ON(!ops);
 	WARN_ON(irqs_disabled());
 
-	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
+	if (dma_release_from_dev_coherent(dev, size, cpu_addr))
 		return;
 
 	if (!ops->free || !cpu_addr)
-- 
1.7.9.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ