lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20251117211238.50126-1-21cnbao@gmail.com>
Date: Tue, 18 Nov 2025 05:12:38 +0800
From: Barry Song <21cnbao@...il.com>
To: catalin.marinas@....com
Cc: 21cnbao@...il.com,
	ada.coupriediaz@....com,
	anshuman.khandual@....com,
	ardb@...nel.org,
	iommu@...ts.linux.dev,
	linux-arm-kernel@...ts.infradead.org,
	linux-kernel@...r.kernel.org,
	m.szyprowski@...sung.com,
	maz@...nel.org,
	robin.murphy@....com,
	ryan.roberts@....com,
	surenb@...gle.com,
	v-songbaohua@...o.com,
	will@...nel.org,
	zhengtangquan@...o.com
Subject: Re: [RFC PATCH 5/5] dma-mapping: Allow batched DMA sync operations if supported by the arch

On Fri, Nov 14, 2025 at 2:19 AM Catalin Marinas <catalin.marinas@....com> wrote:
>
> On Wed, Oct 29, 2025 at 10:31:15AM +0800, Barry Song wrote:
> > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> > index 1f9ee9759426..a0b45f84a91f 100644
> > --- a/kernel/dma/direct.c
> > +++ b/kernel/dma/direct.c
> > @@ -403,9 +403,16 @@ void dma_direct_sync_sg_for_device(struct device *dev,
> >               swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
> >
> >               if (!dev_is_dma_coherent(dev))
> > -                     arch_sync_dma_for_device(paddr, sg->length,
> > -                                     dir);
> > +#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> > +                     arch_sync_dma_for_device_batch_add(paddr, sg->length, dir);
> > +#else
> > +                     arch_sync_dma_for_device(paddr, sg->length, dir);
> > +#endif
> >       }
> > +#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> > +     if (!dev_is_dma_coherent(dev))
> > +             arch_sync_dma_batch_flush();
> > +#endif
> >  }
> >  #endif
>
> Just a high-level comment for now. I'm not opposed to the idea of
> batching the DSB barriers, we do this for ptes. However, the way it's


Thanks, Catalin. I agree we need batching, as phones and embedded systems
could use many DMA buffers while some chips lack DMA-coherency.


> implemented in the generic files, with lots of #ifdefs, makes the code
> pretty unreadable.
>
> Can we have something like arch_sync_dma_begin/end() and let the arch
> code handle the barriers as they see fit?


I guess I can refactor it as below and then remove the #ifdef/#else/#endif blocks.

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 8fcd0a9c1f39..73bca4d7149d 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -373,6 +373,20 @@ void arch_sync_dma_for_device_batch_add(phys_addr_t paddr, size_t size,
 void arch_sync_dma_for_cpu_batch_add(phys_addr_t paddr, size_t size,
 		enum dma_data_direction dir);
 void arch_sync_dma_batch_flush(void);
+#else
+static inline void arch_sync_dma_for_device_batch_add(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	arch_sync_dma_for_device(paddr, size, dir);
+}
+static inline void arch_sync_dma_for_cpu_batch_add(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	arch_sync_dma_for_cpu(paddr, size, dir);
+}
+static inline void arch_sync_dma_batch_flush(void)
+{
+}
 #endif
 
 #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index a0b45f84a91f..69b14b0c0501 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -403,16 +403,10 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 		swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
 
 		if (!dev_is_dma_coherent(dev))
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 			arch_sync_dma_for_device_batch_add(paddr, sg->length, dir);
-#else
-			arch_sync_dma_for_device(paddr, sg->length, dir);
-#endif
 	}
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 	if (!dev_is_dma_coherent(dev))
 		arch_sync_dma_batch_flush();
-#endif
 }
 #endif
 
@@ -429,11 +423,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
 
 		if (!dev_is_dma_coherent(dev))
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 			arch_sync_dma_for_cpu_batch_add(paddr, sg->length, dir);
-#else
-			arch_sync_dma_for_cpu(paddr, sg->length, dir);
-#endif
 
 		swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
 
@@ -443,9 +433,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 
 	if (!dev_is_dma_coherent(dev)) {
 		arch_sync_dma_for_cpu_all();
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 		arch_sync_dma_batch_flush();
-#endif
 	}
 }
 
@@ -458,29 +446,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
 {
 	struct scatterlist *sg;
 	int i;
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 	bool need_sync = false;
-#endif
 
 	for_each_sg(sgl,  sg, nents, i) {
 		if (sg_dma_is_bus_address(sg)) {
 			sg_dma_unmark_bus_address(sg);
 		} else {
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 			need_sync = true;
 			dma_direct_unmap_phys_batch_add(dev, sg->dma_address,
 					      sg_dma_len(sg), dir, attrs);
-
-#else
-			dma_direct_unmap_phys(dev, sg->dma_address,
-					      sg_dma_len(sg), dir, attrs);
-#endif
 		}
 	}
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 	if (need_sync && !dev_is_dma_coherent(dev))
 		arch_sync_dma_batch_flush();
-#endif
 }
 #endif
 
@@ -490,9 +468,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 	struct pci_p2pdma_map_state p2pdma_state = {};
 	struct scatterlist *sg;
 	int i, ret;
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 	bool need_sync = false;
-#endif
 
 	for_each_sg(sgl, sg, nents, i) {
 		switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
@@ -504,14 +480,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 			 */
 			break;
 		case PCI_P2PDMA_MAP_NONE:
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 			need_sync = true;
 			sg->dma_address = dma_direct_map_phys_batch_add(dev, sg_phys(sg),
 					sg->length, dir, attrs);
-#else
-			sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
-					sg->length, dir, attrs);
-#endif
 			if (sg->dma_address == DMA_MAPPING_ERROR) {
 				ret = -EIO;
 				goto out_unmap;
@@ -529,10 +500,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 		sg_dma_len(sg) = sg->length;
 	}
 
-#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
 	if (need_sync && !dev_is_dma_coherent(dev))
 		arch_sync_dma_batch_flush();
-#endif
 	return nents;
 
 out_unmap:


Thanks
Barry

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ