[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251223023648.31614-1-21cnbao@gmail.com>
Date: Tue, 23 Dec 2025 15:36:46 +1300
From: Barry Song <21cnbao@...il.com>
To: 21cnbao@...il.com,
leon@...nel.org
Cc: ada.coupriediaz@....com,
anshuman.khandual@....com,
ardb@...nel.org,
catalin.marinas@....com,
iommu@...ts.linux.dev,
linux-arm-kernel@...ts.infradead.org,
linux-kernel@...r.kernel.org,
m.szyprowski@...sung.com,
maz@...nel.org,
robin.murphy@....com,
ryan.roberts@....com,
surenb@...gle.com,
v-songbaohua@...o.com,
will@...nel.org,
zhengtangquan@...o.com
Subject: Re: [PATCH 5/6] dma-mapping: Allow batched DMA sync operations if supported by the arch
>
> >
> > I would also rename arch_sync_dma_batch_flush() to arch_sync_dma_flush().
>
> Sure.
>
> >
> > You can also minimize changes in dma_direct_map_phys() too, by extending
> > it's signature to provide if flush is needed or not.
>
> Yes. I have
>
> static inline dma_addr_t __dma_direct_map_phys(struct device *dev,
> phys_addr_t phys, size_t size, enum dma_data_direction dir,
> unsigned long attrs, bool flush)
>
> and two wrappers:
> static inline dma_addr_t dma_direct_map_phys(struct device *dev,
> phys_addr_t phys, size_t size, enum dma_data_direction dir,
> unsigned long attrs)
> {
> return __dma_direct_map_phys(dev, phys, size, dir, attrs, true);
> }
>
> static inline dma_addr_t dma_direct_map_phys_batch_add(struct device *dev,
> phys_addr_t phys, size_t size, enum dma_data_direction dir,
> unsigned long attrs)
> {
> return __dma_direct_map_phys(dev, phys, size, dir, attrs, false);
> }
>
> If you prefer exposing "flush" directly in dma_direct_map_phys()
> and updating its callers with flush=true, I think that’s fine.
>
> It could be also true for dma_direct_sync_single_for_device().
sorry for typo. I meant dma_direct_sync_single_for_cpu().
With flush passed as an argument, the patch becomes the following.
Please feel free to comment before I send v2.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 50c3fe2a1d55..5c65d213eb37 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -403,9 +403,11 @@ void dma_direct_sync_sg_for_device(struct device *dev,
swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_device(paddr, sg->length,
+ arch_sync_dma_for_device_batch_add(paddr, sg->length,
dir);
}
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -422,7 +424,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_cpu(paddr, sg->length, dir);
+ arch_sync_dma_for_cpu_batch_add(paddr, sg->length, dir);
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
@@ -430,8 +432,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
arch_dma_mark_clean(paddr, sg->length);
}
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu_all();
+ arch_sync_dma_flush();
+ }
}
/*
@@ -443,14 +447,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
{
struct scatterlist *sg;
int i;
+ bool need_sync = false;
for_each_sg(sgl, sg, nents, i) {
- if (sg_dma_is_bus_address(sg))
+ if (sg_dma_is_bus_address(sg)) {
sg_dma_unmark_bus_address(sg);
- else
+ } else {
+ need_sync = true;
dma_direct_unmap_phys(dev, sg->dma_address,
- sg_dma_len(sg), dir, attrs);
+ sg_dma_len(sg), dir, attrs, false);
+ }
}
+ if (need_sync && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -460,6 +469,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
struct pci_p2pdma_map_state p2pdma_state = {};
struct scatterlist *sg;
int i, ret;
+ bool need_sync = false;
for_each_sg(sgl, sg, nents, i) {
switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
@@ -471,8 +481,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
*/
break;
case PCI_P2PDMA_MAP_NONE:
+ need_sync = true;
sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
- sg->length, dir, attrs);
+ sg->length, dir, attrs, false);
if (sg->dma_address == DMA_MAPPING_ERROR) {
ret = -EIO;
goto out_unmap;
@@ -491,6 +502,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
sg_dma_len(sg) = sg->length;
}
+ if (need_sync && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
return nents;
out_unmap:
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index da2fadf45bcd..b13eb5bfd051 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -65,12 +65,15 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
}
static inline void dma_direct_sync_single_for_cpu(struct device *dev,
- dma_addr_t addr, size_t size, enum dma_data_direction dir)
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ bool flush)
{
phys_addr_t paddr = dma_to_phys(dev, addr);
if (!dev_is_dma_coherent(dev)) {
- arch_sync_dma_for_cpu(paddr, size, dir);
+ arch_sync_dma_for_cpu_batch_add(paddr, size, dir);
+ if (flush)
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
}
@@ -82,7 +85,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
static inline dma_addr_t dma_direct_map_phys(struct device *dev,
phys_addr_t phys, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+ unsigned long attrs, bool flush)
{
dma_addr_t dma_addr;
@@ -109,8 +112,11 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
}
if (!dev_is_dma_coherent(dev) &&
- !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
- arch_sync_dma_for_device(phys, size, dir);
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
+ arch_sync_dma_for_device_batch_add(phys, size, dir);
+ if (flush)
+ arch_sync_dma_flush();
+ }
return dma_addr;
err_overflow:
@@ -122,7 +128,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
}
static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
+ size_t size, enum dma_data_direction dir, unsigned long attrs,
+ bool flush)
{
phys_addr_t phys;
@@ -132,9 +139,10 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
phys = dma_to_phys(dev, addr);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush);
swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
}
+
#endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 37163eb49f9f..d8cfa56a3cbb 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -166,7 +166,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
- addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
+ addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true);
else if (use_dma_iommu(dev))
addr = iommu_dma_map_phys(dev, phys, size, dir, attrs);
else if (ops->map_phys)
@@ -207,7 +207,7 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
BUG_ON(!valid_dma_direction(dir));
if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size)))
- dma_direct_unmap_phys(dev, addr, size, dir, attrs);
+ dma_direct_unmap_phys(dev, addr, size, dir, attrs, true);
else if (use_dma_iommu(dev))
iommu_dma_unmap_phys(dev, addr, size, dir, attrs);
else if (ops->unmap_phys)
@@ -373,7 +373,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
BUG_ON(!valid_dma_direction(dir));
if (dma_map_direct(dev, ops))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir, true);
else if (use_dma_iommu(dev))
iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu)
--
2.43.0
Powered by blists - more mailing lists