lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1264473346-32721-1-git-send-email-adharmap@codeaurora.org>
Date:	Mon, 25 Jan 2010 18:35:46 -0800
From:	adharmap@...eaurora.org
To:	linux-kernel@...r.kernel.org
Cc:	linux-arm-msm@...r.kernel.org, dwalker@...eaurora.org,
	Arnd Bergmann <arnd@...db.de>,
	FUJITA Tomonori <fujita.tomonori@....ntt.co.jp>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Ingo Molnar <mingo@...e.hu>,
	Joerg Roedel <joerg.roedel@....com>,
	Maciej Sosnowski <maciej.sosnowski@...el.com>,
	Dan Williams <dan.j.williams@...el.com>,
	Becky Bruce <beckyb@...nel.crashing.org>,
	Benjamin Herrenschmidt <benh@...nel.crashing.org>,
	Yang Hongyang <yanghy@...fujitsu.com>,
	linux-arch@...r.kernel.org,
	Abhijeet Dharmapurikar <adharmap@...cinc.com>,
	Abhijeet Dharmapurikar <adharmap@...eaurora.org>
Subject: [RFC PATCH] dma: Add barrierless dma mapping/unmapping api

From: Abhijeet Dharmapurikar <adharmap@...cinc.com>

Tests have shown that there is a significant performance gain
when a driver uses mapping/unmapping functions without a barrier
multiple times and calls dsb() only after the last buffer. This
api adds support for barrierless dma operations on ARMv6
and ARMv7.

Signed-off-by: Abhijeet Dharmapurikar <adharmap@...eaurora.org>
---

Please refer to the earlier post here http://lkml.org/lkml/2010/1/4/347. 
These changes only are only for the arm v6 and v7
architecture and introduce dma_map/unmap_single_nobarrier api.
The idea is to extend the cpu_cache_fns.dma_.*_range functions to accept 
a third boolean argument that tells it whether to execute the barrier or
skip it. Note that this applies over Linux 2.6.33-rc5 and needs a few changes
to apply on the linux-next tree.


 arch/arm/include/asm/cacheflush.h  |   12 ++++----
 arch/arm/include/asm/dma-mapping.h |   57 ++++++++++++++++++++++++++++++++++--
 arch/arm/mm/cache-v6.S             |   15 ++++++----
 arch/arm/mm/cache-v7.S             |   19 +++++++++++-
 arch/arm/mm/dma-mapping.c          |   17 ++++++-----
 drivers/staging/dream/pmem.c       |    4 +-
 6 files changed, 97 insertions(+), 27 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index c77d2fa..59ce7fb 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -227,9 +227,9 @@ struct cpu_cache_fns {
 	void (*coherent_user_range)(unsigned long, unsigned long);
 	void (*flush_kern_dcache_area)(void *, size_t);
 
-	void (*dma_inv_range)(const void *, const void *);
-	void (*dma_clean_range)(const void *, const void *);
-	void (*dma_flush_range)(const void *, const void *);
+	void (*dma_inv_range)(const void *, const void *, unsigned long);
+	void (*dma_clean_range)(const void *, const void *, unsigned long);
+	void (*dma_flush_range)(const void *, const void *, unsigned long);
 };
 
 struct outer_cache_fns {
@@ -288,9 +288,9 @@ extern void __cpuc_flush_dcache_area(void *, size_t);
 #define dmac_clean_range		__glue(_CACHE,_dma_clean_range)
 #define dmac_flush_range		__glue(_CACHE,_dma_flush_range)
 
-extern void dmac_inv_range(const void *, const void *);
-extern void dmac_clean_range(const void *, const void *);
-extern void dmac_flush_range(const void *, const void *);
+extern void dmac_inv_range(const void *, const void *, unsigned long);
+extern void dmac_clean_range(const void *, const void *, unsigned long);
+extern void dmac_flush_range(const void *, const void *, unsigned long);
 
 #endif
 
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index a96300b..066923c 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -66,7 +66,8 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
  * platforms with CONFIG_DMABOUNCE.
  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
  */
-extern void dma_cache_maint(const void *kaddr, size_t size, int rw);
+extern void dma_cache_maint(const void *kaddr, size_t size, int rw,
+						unsigned long barrier);
 extern void dma_cache_maint_page(struct page *page, unsigned long offset,
 				 size_t size, int rw);
 
@@ -297,6 +298,7 @@ static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr,
  *
  * The device owns this memory once this call has completed.  The CPU
  * can regain ownership by calling dma_unmap_single() or
+ * or dma_unmap_single_nobarrier followed by dsb/dmb or
  * dma_sync_single_for_cpu().
  */
 static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
@@ -305,12 +307,40 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 	BUG_ON(!valid_dma_direction(dir));
 
 	if (!arch_is_coherent())
-		dma_cache_maint(cpu_addr, size, dir);
+		dma_cache_maint(cpu_addr, size, dir, 1);
 
 	return virt_to_dma(dev, cpu_addr);
 }
 
 /**
+ * dma_map_single_nobarrier - map a single buffer for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @cpu_addr: CPU direct mapped address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed and a dsb is
+ * executed.  The CPU
+ * can regain ownership by calling dma_unmap_single() or
+ * dma_map_single_nobarrier followed by dsb() or
+ * dma_sync_single_for_cpu().
+ */
+static inline dma_addr_t dma_map_single_nobarrier(struct device *dev,
+		void *cpu_addr, size_t size, enum dma_data_direction dir)
+{
+	BUG_ON(!valid_dma_direction(dir));
+
+	if (!arch_is_coherent())
+		dma_cache_maint(cpu_addr, size, dir, 0);
+
+	return virt_to_dma(dev, cpu_addr);
+}
+
+
+/**
  * dma_map_page - map a portion of a page for streaming DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
  * @page: page that buffer resides in
@@ -356,6 +386,26 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t handle,
 }
 
 /**
+ * dma_unmap_single_nobarrier - unmap a single buffer previously mapped
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_single)
+ * @dir: DMA transfer direction (same as passed to dma_map_single)
+ *
+ * Unmap a single streaming mode DMA translation.  The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, and a dsb(), reads by the CPU to the buffer are
+ * guaranteed to see  whatever the device wrote there.
+ */
+static inline void dma_unmap_single_nobarrier(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	/* nothing to do */
+}
+
+/**
  * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
  * @handle: DMA address of buffer
@@ -413,7 +463,8 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 		return;
 
 	if (!arch_is_coherent())
-		dma_cache_maint(dma_to_virt(dev, handle) + offset, size, dir);
+		dma_cache_maint(dma_to_virt(dev, handle) + offset,
+							size, dir, 1);
 }
 
 static inline void dma_sync_single_for_cpu(struct device *dev,
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 4ba0a24..a88ab56 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -186,7 +186,7 @@ ENTRY(v6_flush_kern_dcache_area)
 
 
 /*
- *	v6_dma_inv_range(start,end)
+ *	v6_dma_inv_range(start,end,barrier)
  *
  *	Invalidate the data cache within the specified region; we will
  *	be performing a DMA operation in this region and we want to
@@ -220,11 +220,12 @@ ENTRY(v6_dma_inv_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	cmp	r2, #1
+	mcreq	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
- *	v6_dma_clean_range(start,end)
+ *	v6_dma_clean_range(start,end,barrier)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
@@ -240,11 +241,12 @@ ENTRY(v6_dma_clean_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	cmp	r2, #1
+	mcreq	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 /*
- *	v6_dma_flush_range(start,end)
+ *	v6_dma_flush_range(start,end,barrier)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
@@ -260,7 +262,8 @@ ENTRY(v6_dma_flush_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	cmp	r2, #1
+	mcreq	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
 	__INITDATA
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 9073db8..147a325 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -217,6 +217,7 @@ ENDPROC(v7_flush_kern_dcache_area)
  *	- end     - virtual end address of region
  */
 ENTRY(v7_dma_inv_range)
+	str r2,[sp, #-4]!
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
 	tst	r0, r3
@@ -231,16 +232,21 @@ ENTRY(v7_dma_inv_range)
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
+	ldr r2, [sp], #4
+	cmp r2, #1
+	bne 2f
 	dsb
+2:
 	mov	pc, lr
 ENDPROC(v7_dma_inv_range)
 
 /*
- *	v7_dma_clean_range(start,end)
+ *	v7_dma_clean_range(start,end,barrier)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
 ENTRY(v7_dma_clean_range)
+	str r2,[sp, #-4]!
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
 	bic	r0, r0, r3
@@ -249,16 +255,21 @@ ENTRY(v7_dma_clean_range)
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
+	ldr r2, [sp], #4
+	cmp r2, #1
+	bne 2f
 	dsb
+2:
 	mov	pc, lr
 ENDPROC(v7_dma_clean_range)
 
 /*
- *	v7_dma_flush_range(start,end)
+ *	v7_dma_flush_range(start,end,barrier)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
 ENTRY(v7_dma_flush_range)
+	str r2,[sp, #-4]!
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
 	bic	r0, r0, r3
@@ -267,7 +278,11 @@ ENTRY(v7_dma_flush_range)
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
+	ldr r2, [sp], #4
+	cmp r2, #1
+	bne 2f
 	dsb
+2:
 	mov	pc, lr
 ENDPROC(v7_dma_flush_range)
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 26325cb..c9e3124 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -106,7 +106,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
 	 */
 	ptr = page_address(page);
 	memset(ptr, 0, size);
-	dmac_flush_range(ptr, ptr + size);
+	dmac_flush_range(ptr, ptr + size, 1);
 	outer_flush_range(__pa(ptr), __pa(ptr) + size);
 
 	return page;
@@ -404,10 +404,11 @@ EXPORT_SYMBOL(dma_free_coherent);
  * platforms with CONFIG_DMABOUNCE.
  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
  */
-void dma_cache_maint(const void *start, size_t size, int direction)
+void dma_cache_maint(const void *start, size_t size, int direction,
+						unsigned long barrier)
 {
-	void (*inner_op)(const void *, const void *);
-	void (*outer_op)(unsigned long, unsigned long);
+	void (*inner_op)(const void *, const void *, unsigned long);
+	void (*outer_op)(unsigned long, unsigned long, unsigned long);
 
 	BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1));
 
@@ -428,7 +429,7 @@ void dma_cache_maint(const void *start, size_t size, int direction)
 		BUG();
 	}
 
-	inner_op(start, start + size);
+	inner_op(start, start + size, barrier);
 	outer_op(__pa(start), __pa(start) + size);
 }
 EXPORT_SYMBOL(dma_cache_maint);
@@ -438,7 +439,7 @@ static void dma_cache_maint_contiguous(struct page *page, unsigned long offset,
 {
 	void *vaddr;
 	unsigned long paddr;
-	void (*inner_op)(const void *, const void *);
+	void (*inner_op)(const void *, const void *, unsigned long);
 	void (*outer_op)(unsigned long, unsigned long);
 
 	switch (direction) {
@@ -460,12 +461,12 @@ static void dma_cache_maint_contiguous(struct page *page, unsigned long offset,
 
 	if (!PageHighMem(page)) {
 		vaddr = page_address(page) + offset;
-		inner_op(vaddr, vaddr + size);
+		inner_op(vaddr, vaddr + size, 1);
 	} else {
 		vaddr = kmap_high_get(page);
 		if (vaddr) {
 			vaddr += offset;
-			inner_op(vaddr, vaddr + size);
+			inner_op(vaddr, vaddr + size, 1);
 			kunmap_high(page);
 		}
 	}
diff --git a/drivers/staging/dream/pmem.c b/drivers/staging/dream/pmem.c
index def6468..1b24945 100644
--- a/drivers/staging/dream/pmem.c
+++ b/drivers/staging/dream/pmem.c
@@ -802,7 +802,7 @@ void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len)
 	vaddr = pmem_start_vaddr(id, data);
 	/* if this isn't a submmapped file, flush the whole thing */
 	if (unlikely(!(data->flags & PMEM_FLAGS_CONNECTED))) {
-		dmac_flush_range(vaddr, vaddr + pmem_len(id, data));
+		dmac_flush_range(vaddr, vaddr + pmem_len(id, data), 1);
 		goto end;
 	}
 	/* otherwise, flush the region of the file we are drawing */
@@ -813,7 +813,7 @@ void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len)
 			region_node->region.len))) {
 			flush_start = vaddr + region_node->region.offset;
 			flush_end = flush_start + region_node->region.len;
-			dmac_flush_range(flush_start, flush_end);
+			dmac_flush_range(flush_start, flush_end, 1);
 			break;
 		}
 	}
-- 
1.5.6.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ