[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <893ad3a4-ba24-43cf-8200-b8cd7742622d@arm.com>
Date: Wed, 14 Feb 2024 17:20:50 +0000
From: Robin Murphy <robin.murphy@....com>
To: Alexander Lobakin <aleksander.lobakin@...el.com>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>
Cc: Christoph Hellwig <hch@....de>,
Marek Szyprowski <m.szyprowski@...sung.com>, Joerg Roedel <joro@...tes.org>,
Will Deacon <will@...nel.org>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
"Rafael J. Wysocki" <rafael@...nel.org>,
Magnus Karlsson <magnus.karlsson@...el.com>,
Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
Alexander Duyck <alexanderduyck@...com>, bpf@...r.kernel.org,
netdev@...r.kernel.org, iommu@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when
not used
On 2024-02-14 4:21 pm, Alexander Lobakin wrote:
> Some platforms do have DMA, but DMA there is always direct and coherent.
> Currently, even on such platforms DMA sync operations are compiled and
> called.
> Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when
> either sync operations are needed or there is DMA ops or swiotlb
> enabled. Set dma_need_sync() and dma_skip_sync() depending on this
> symbol state and don't call sync ops when dma_skip_sync() is true.
> The change allows for future optimizations of DMA sync calls depending
> on compile-time or runtime conditions.
>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> ---
> kernel/dma/Kconfig | 4 ++
> include/linux/dma-mapping.h | 80 +++++++++++++++++++++++++++++++------
> kernel/dma/mapping.c | 20 +++++-----
> 3 files changed, 81 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index d62f5957f36b..1c9ff05b1ecb 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
> bool
> depends on SWIOTLB
>
> +config DMA_NEED_SYNC
> + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
> + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB
I'm not sure DMA_OPS belongs here - several architectures have
non-trivial ops without syncs, e.g. Alpha.
> +
> config DMA_RESTRICTED_POOL
> bool "DMA Restricted Pool"
> depends on OF && OF_RESERVED_MEM && SWIOTLB
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 4a658de44ee9..6c7640441214 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -117,13 +117,13 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
> size_t size, enum dma_data_direction dir, unsigned long attrs);
> void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
> enum dma_data_direction dir, unsigned long attrs);
> -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
> +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
> enum dma_data_direction dir);
> -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
> +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
> size_t size, enum dma_data_direction dir);
> -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> int nelems, enum dma_data_direction dir);
> -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> int nelems, enum dma_data_direction dir);
> void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
> gfp_t flag, unsigned long attrs);
> @@ -147,7 +147,7 @@ u64 dma_get_required_mask(struct device *dev);
> bool dma_addressing_limited(struct device *dev);
> size_t dma_max_mapping_size(struct device *dev);
> size_t dma_opt_mapping_size(struct device *dev);
> -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> unsigned long dma_get_merge_boundary(struct device *dev);
> struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
> enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
> @@ -195,19 +195,19 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
> size_t size, enum dma_data_direction dir, unsigned long attrs)
> {
> }
> -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> - size_t size, enum dma_data_direction dir)
> +static inline void __dma_sync_single_for_cpu(struct device *dev,
> + dma_addr_t addr, size_t size, enum dma_data_direction dir)
To me it would feel more logical to put all the wrappers inside the
#ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it
mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is
constant false for !HAS_DMA, then we could also just make the external
function declarations unconditional and remove the stubs. Not a critical
matter though, and I defer to whatever Christoph thinks is most
maintainable.
> {
> }
> -static inline void dma_sync_single_for_device(struct device *dev,
> +static inline void __dma_sync_single_for_device(struct device *dev,
> dma_addr_t addr, size_t size, enum dma_data_direction dir)
> {
> }
> -static inline void dma_sync_sg_for_cpu(struct device *dev,
> +static inline void __dma_sync_sg_for_cpu(struct device *dev,
> struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> {
> }
> -static inline void dma_sync_sg_for_device(struct device *dev,
> +static inline void __dma_sync_sg_for_device(struct device *dev,
> struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> {
> }
> @@ -277,7 +277,7 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
> {
> return 0;
> }
> -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> +static inline bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> {
> return false;
> }
> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
> return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
> }
>
> +static inline void __dma_sync_single_range_for_cpu(struct device *dev,
> + dma_addr_t addr, unsigned long offset, size_t size,
> + enum dma_data_direction dir)
> +{
> + __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> +}
> +
> +static inline void __dma_sync_single_range_for_device(struct device *dev,
> + dma_addr_t addr, unsigned long offset, size_t size,
> + enum dma_data_direction dir)
> +{
> + __dma_sync_single_for_device(dev, addr + offset, size, dir);
> +}
There is no need to introduce these two.
> +
> +static inline bool dma_skip_sync(const struct device *dev)
> +{
> + return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
> +}
> +
> +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> +{
> + return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
> +}
That's a bit of a mind-bender... is it actually just
return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr);
?
(I do still think the negative flag makes it all a little harder to
follow in general than a positive "device needs to consider syncs" flag
would.)
> +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> + size_t size, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_cpu(dev, addr, size, dir);
> +}
> +
> +static inline void dma_sync_single_for_device(struct device *dev,
> + dma_addr_t addr, size_t size, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_device(dev, addr, size, dir);
> +}
> +
> +static inline void dma_sync_sg_for_cpu(struct device *dev,
> + struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_sg_for_cpu(dev, sg, nelems, dir);
> +}
> +
> +static inline void dma_sync_sg_for_device(struct device *dev,
> + struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> +{
> + if (!dma_skip_sync(dev))
> + __dma_sync_sg_for_device(dev, sg, nelems, dir);
> +}
> +
> static inline void dma_sync_single_range_for_cpu(struct device *dev,
> dma_addr_t addr, unsigned long offset, size_t size,
> enum dma_data_direction dir)
> {
> - return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> }
>
> static inline void dma_sync_single_range_for_device(struct device *dev,
> dma_addr_t addr, unsigned long offset, size_t size,
> enum dma_data_direction dir)
> {
> - return dma_sync_single_for_device(dev, addr + offset, size, dir);
> + if (!dma_skip_sync(dev))
> + __dma_sync_single_for_device(dev, addr + offset, size, dir);
> }
These two don't need changing either, since the dma_sync_single_*
wrappers have already taken care of it.
Thanks,
Robin.
Powered by blists - more mailing lists