lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <253c870f-770b-4102-be47-f8c06e9e6c31@samsung.com>
Date: Fri, 21 Nov 2025 17:09:16 +0100
From: Marek Szyprowski <m.szyprowski@...sung.com>
To: Barry Song <21cnbao@...il.com>, catalin.marinas@....com
Cc: ada.coupriediaz@....com, anshuman.khandual@....com, ardb@...nel.org,
	iommu@...ts.linux.dev, linux-arm-kernel@...ts.infradead.org,
	linux-kernel@...r.kernel.org, maz@...nel.org, robin.murphy@....com,
	ryan.roberts@....com, surenb@...gle.com, v-songbaohua@...o.com,
	will@...nel.org, zhengtangquan@...o.com
Subject: Re: [RFC PATCH 5/5] dma-mapping: Allow batched DMA sync operations
 if supported by the arch

Hi Barry,

On 17.11.2025 22:12, Barry Song wrote:
> On Fri, Nov 14, 2025 at 2:19 AM Catalin Marinas <catalin.marinas@....com> wrote:
>> On Wed, Oct 29, 2025 at 10:31:15AM +0800, Barry Song wrote:
>>> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
>>> index 1f9ee9759426..a0b45f84a91f 100644
>>> --- a/kernel/dma/direct.c
>>> +++ b/kernel/dma/direct.c
>>> @@ -403,9 +403,16 @@ void dma_direct_sync_sg_for_device(struct device *dev,
>>>                swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
>>>
>>>                if (!dev_is_dma_coherent(dev))
>>> -                     arch_sync_dma_for_device(paddr, sg->length,
>>> -                                     dir);
>>> +#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>>> +                     arch_sync_dma_for_device_batch_add(paddr, sg->length, dir);
>>> +#else
>>> +                     arch_sync_dma_for_device(paddr, sg->length, dir);
>>> +#endif
>>>        }
>>> +#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>>> +     if (!dev_is_dma_coherent(dev))
>>> +             arch_sync_dma_batch_flush();
>>> +#endif
>>>   }
>>>   #endif
>> Just a high-level comment for now. I'm not opposed to the idea of
>> batching the DSB barriers, we do this for ptes. However, the way it's
>
> Thanks, Catalin. I agree we need batching, as phones and embedded systems
> could use many DMA buffers while some chips lack DMA-coherency.
>
>
>> implemented in the generic files, with lots of #ifdefs, makes the code
>> pretty unreadable.
>>
>> Can we have something like arch_sync_dma_begin/end() and let the arch
>> code handle the barriers as they see fit?
>
> I guess I can refactor it as below and then remove the #ifdef/#else/#endif blocks.
>
> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
> index 8fcd0a9c1f39..73bca4d7149d 100644
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -373,6 +373,20 @@ void arch_sync_dma_for_device_batch_add(phys_addr_t paddr, size_t size,
>   void arch_sync_dma_for_cpu_batch_add(phys_addr_t paddr, size_t size,
>   		enum dma_data_direction dir);
>   void arch_sync_dma_batch_flush(void);
> +#else
> +static inline void arch_sync_dma_for_device_batch_add(phys_addr_t paddr, size_t size,
> +		enum dma_data_direction dir)
> +{
> +	arch_sync_dma_for_device(paddr, size, dir);
> +}
> +static inline void arch_sync_dma_for_cpu_batch_add(phys_addr_t paddr, size_t size,
> +		enum dma_data_direction dir)
> +{
> +	arch_sync_dma_for_cpu(paddr, size, dir);
> +}
> +static inline void arch_sync_dma_batch_flush(void)
> +{
> +}
>   #endif
>   
>   #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index a0b45f84a91f..69b14b0c0501 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -403,16 +403,10 @@ void dma_direct_sync_sg_for_device(struct device *dev,
>   		swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
>   
>   		if (!dev_is_dma_coherent(dev))
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   			arch_sync_dma_for_device_batch_add(paddr, sg->length, dir);
> -#else
> -			arch_sync_dma_for_device(paddr, sg->length, dir);
> -#endif
>   	}
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   	if (!dev_is_dma_coherent(dev))
>   		arch_sync_dma_batch_flush();
> -#endif
>   }
>   #endif
>   
> @@ -429,11 +423,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
>   		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
>   
>   		if (!dev_is_dma_coherent(dev))
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   			arch_sync_dma_for_cpu_batch_add(paddr, sg->length, dir);
> -#else
> -			arch_sync_dma_for_cpu(paddr, sg->length, dir);
> -#endif
>   
>   		swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
>   
> @@ -443,9 +433,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
>   
>   	if (!dev_is_dma_coherent(dev)) {
>   		arch_sync_dma_for_cpu_all();
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   		arch_sync_dma_batch_flush();
> -#endif
>   	}
>   }
>   
> @@ -458,29 +446,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
>   {
>   	struct scatterlist *sg;
>   	int i;
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   	bool need_sync = false;
> -#endif
>   
>   	for_each_sg(sgl,  sg, nents, i) {
>   		if (sg_dma_is_bus_address(sg)) {
>   			sg_dma_unmark_bus_address(sg);
>   		} else {
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   			need_sync = true;
>   			dma_direct_unmap_phys_batch_add(dev, sg->dma_address,
>   					      sg_dma_len(sg), dir, attrs);
> -
> -#else
> -			dma_direct_unmap_phys(dev, sg->dma_address,
> -					      sg_dma_len(sg), dir, attrs);
> -#endif
>   		}
>   	}
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   	if (need_sync && !dev_is_dma_coherent(dev))
>   		arch_sync_dma_batch_flush();
> -#endif
>   }
>   #endif
>   
> @@ -490,9 +468,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
>   	struct pci_p2pdma_map_state p2pdma_state = {};
>   	struct scatterlist *sg;
>   	int i, ret;
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   	bool need_sync = false;
> -#endif
>   
>   	for_each_sg(sgl, sg, nents, i) {
>   		switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
> @@ -504,14 +480,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
>   			 */
>   			break;
>   		case PCI_P2PDMA_MAP_NONE:
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   			need_sync = true;
>   			sg->dma_address = dma_direct_map_phys_batch_add(dev, sg_phys(sg),
>   					sg->length, dir, attrs);
> -#else
> -			sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
> -					sg->length, dir, attrs);
> -#endif
>   			if (sg->dma_address == DMA_MAPPING_ERROR) {
>   				ret = -EIO;
>   				goto out_unmap;
> @@ -529,10 +500,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
>   		sg_dma_len(sg) = sg->length;
>   	}
>   
> -#ifdef CONFIG_ARCH_WANT_BATCHED_DMA_SYNC
>   	if (need_sync && !dev_is_dma_coherent(dev))
>   		arch_sync_dma_batch_flush();
> -#endif
>   	return nents;
>   
>   out_unmap:
>
>
This version looks a bit better to me. Similar batching could be added 
also to dma_iova_link()/dma_iova_sync() paths.

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ