[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <253c870f-770b-4102-be47-f8c06e9e6c31@samsung.com>
Date: Fri, 21 Nov 2025 17:09:16 +0100
From: Marek Szyprowski <m.szyprowski@...sung.com>
To: Barry Song <21cnbao@...il.com>, catalin.marinas@....com
Cc: ada.coupriediaz@....com, anshuman.khandual@....com, ardb@...nel.org,
iommu@...ts.linux.dev, linux-arm-kernel@...ts.infradead.org,
linux-kernel@...r.kernel.org, maz@...nel.org, robin.murphy@....com,
ryan.roberts@....com, surenb@...gle.com, v-songbaohua@...o.com,
will@...nel.org, zhengtangquan@...o.com
Subject: Re: [RFC PATCH 5/5] dma-mapping: Allow batched DMA sync operations
if supported by the arch
Hi Barry,
On 17.11.2025 22:12, Barry Song wrote:
> On Fri, Nov 14, 2025 at 2:19 AM Catalin Marinas <catalin.marinas@....com> wrote:
>> On Wed, Oct 29, 2025 at 10:31:15AM +0800, Barry Song wrote:
>>> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
>>> index 1f9ee9759426..a0b45f84a91f 100644
>>> --- a/kernel/dma/direct.c
>>> +++ b/kernel/dma/direct.c
>>> @@ -403,9 +403,16 @@ void dma_direct_sync_sg_for_device(struct device *dev,
>>> swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
>>>
>>> if (!dev_is_dma_coherent(dev))
>>> - arch_sync_dma_for_device(paddr, sg->length,
>>> - dir);
>>> +#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>>> + arch_sync_dma_for_device_batch_add(paddr, sg->length, dir);
>>> +#else
>>> + arch_sync_dma_for_device(paddr, sg->length, dir);
>>> +#endif
>>> }
>>> +#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>>> + if (!dev_is_dma_coherent(dev))
>>> + arch_sync_dma_batch_flush();
>>> +#endif
>>> }
>>> #endif
>> Just a high-level comment for now. I'm not opposed to the idea of
>> batching the DSB barriers, we do this for ptes. However, the way it's
>
> Thanks, Catalin. I agree we need batching, as phones and embedded systems
> could use many DMA buffers while some chips lack DMA-coherency.
>
>
>> implemented in the generic files, with lots of #ifdefs, makes the code
>> pretty unreadable.
>>
>> Can we have something like arch_sync_dma_begin/end() and let the arch
>> code handle the barriers as they see fit?
>
> I guess I can refactor it as below and then remove the #ifdef/#else/#endif blocks.
>
> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
> index 8fcd0a9c1f39..73bca4d7149d 100644
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -373,6 +373,20 @@ void arch_sync_dma_for_device_batch_add(phys_addr_t paddr, size_t size,
> void arch_sync_dma_for_cpu_batch_add(phys_addr_t paddr, size_t size,
> enum dma_data_direction dir);
> void arch_sync_dma_batch_flush(void);
> +#else
> +static inline void arch_sync_dma_for_device_batch_add(phys_addr_t paddr, size_t size,
> + enum dma_data_direction dir)
> +{
> + arch_sync_dma_for_device(paddr, size, dir);
> +}
> +static inline void arch_sync_dma_for_cpu_batch_add(phys_addr_t paddr, size_t size,
> + enum dma_data_direction dir)
> +{
> + arch_sync_dma_for_cpu(paddr, size, dir);
> +}
> +static inline void arch_sync_dma_batch_flush(void)
> +{
> +}
> #endif
>
> #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index a0b45f84a91f..69b14b0c0501 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -403,16 +403,10 @@ void dma_direct_sync_sg_for_device(struct device *dev,
> swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
>
> if (!dev_is_dma_coherent(dev))
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> arch_sync_dma_for_device_batch_add(paddr, sg->length, dir);
> -#else
> - arch_sync_dma_for_device(paddr, sg->length, dir);
> -#endif
> }
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> if (!dev_is_dma_coherent(dev))
> arch_sync_dma_batch_flush();
> -#endif
> }
> #endif
>
> @@ -429,11 +423,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
> phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
>
> if (!dev_is_dma_coherent(dev))
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> arch_sync_dma_for_cpu_batch_add(paddr, sg->length, dir);
> -#else
> - arch_sync_dma_for_cpu(paddr, sg->length, dir);
> -#endif
>
> swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
>
> @@ -443,9 +433,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
>
> if (!dev_is_dma_coherent(dev)) {
> arch_sync_dma_for_cpu_all();
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> arch_sync_dma_batch_flush();
> -#endif
> }
> }
>
> @@ -458,29 +446,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
> {
> struct scatterlist *sg;
> int i;
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> bool need_sync = false;
> -#endif
>
> for_each_sg(sgl, sg, nents, i) {
> if (sg_dma_is_bus_address(sg)) {
> sg_dma_unmark_bus_address(sg);
> } else {
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> need_sync = true;
> dma_direct_unmap_phys_batch_add(dev, sg->dma_address,
> sg_dma_len(sg), dir, attrs);
> -
> -#else
> - dma_direct_unmap_phys(dev, sg->dma_address,
> - sg_dma_len(sg), dir, attrs);
> -#endif
> }
> }
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> if (need_sync && !dev_is_dma_coherent(dev))
> arch_sync_dma_batch_flush();
> -#endif
> }
> #endif
>
> @@ -490,9 +468,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
> struct pci_p2pdma_map_state p2pdma_state = {};
> struct scatterlist *sg;
> int i, ret;
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> bool need_sync = false;
> -#endif
>
> for_each_sg(sgl, sg, nents, i) {
> switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
> @@ -504,14 +480,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
> */
> break;
> case PCI_P2PDMA_MAP_NONE:
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> need_sync = true;
> sg->dma_address = dma_direct_map_phys_batch_add(dev, sg_phys(sg),
> sg->length, dir, attrs);
> -#else
> - sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
> - sg->length, dir, attrs);
> -#endif
> if (sg->dma_address == DMA_MAPPING_ERROR) {
> ret = -EIO;
> goto out_unmap;
> @@ -529,10 +500,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
> sg_dma_len(sg) = sg->length;
> }
>
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
> if (need_sync && !dev_is_dma_coherent(dev))
> arch_sync_dma_batch_flush();
> -#endif
> return nents;
>
> out_unmap:
>
>
This version looks a bit better to me. Similar batching could be added
also to dma_iova_link()/dma_iova_sync() paths.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Powered by blists - more mailing lists