netdev - Re: [PATCH 5/6] net: fec: use dma_alloc

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ea608718-8a50-4f87-aecf-fc100d283fe8@arm.com>
Date: Mon, 9 Oct 2023 11:29:12 +0100
From: Robin Murphy <robin.murphy@....com>
To: Christoph Hellwig <hch@....de>, iommu@...ts.linux.dev
Cc: Marek Szyprowski <m.szyprowski@...sung.com>,
 Geert Uytterhoeven <geert@...ux-m68k.org>, Wei Fang <wei.fang@....com>,
 Shenwei Wang <shenwei.wang@....com>, Clark Wang <xiaoning.wang@....com>,
 NXP Linux Team <linux-imx@....com>, linux-m68k@...ts.linux-m68k.org,
 netdev@...r.kernel.org, Jim Quinlan <james.quinlan@...adcom.com>,
 Greg Ungerer <gerg@...ux-m68k.org>
Subject: Re: [PATCH 5/6] net: fec: use dma_alloc_noncoherent for m532x

On 2023-10-09 08:41, Christoph Hellwig wrote:
> The coldfire platforms can't properly implement dma_alloc_coherent and
> currently just return noncoherent memory from dma_alloc_coherent.
> 
> The fec driver than works around this with a flush of all caches in the
> receive path. Make this hack a little less bad by using the explicit
> dma_alloc_noncoherent API and documenting the hacky cache flushes so
> that the DMA API level hack can be removed.
> 
> Signed-off-by: Christoph Hellwig <hch@....de>
> ---
>   drivers/net/ethernet/freescale/fec_main.c | 84 ++++++++++++++++++++---
>   1 file changed, 75 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 77c8e9cfb44562..aa032ea0ebf0c2 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -406,6 +406,70 @@ static void fec_dump(struct net_device *ndev)
>   	} while (bdp != txq->bd.base);
>   }
>   
> +/*
> + * Coldfire does not support DMA coherent allocations, and has historically used
> + * a band-aid with a manual flush in fec_enet_rx_queue.
> + */
> +#ifdef CONFIG_COLDFIRE

It looks a bit odd that this ends up applying to all of Coldfire, while 
the associated cache flush only applies to the M532x platform, which 
implies that we'd now be relying on the non-coherent allocation actually 
being coherent on other Coldfire platforms.

Would it work to do something like this to make sure dma-direct does the 
right thing on such platforms (which presumably don't have caches?), and 
then reduce the scope of this FEC hack accordingly, to clean things up 
even better?

diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index b826e9c677b2..1851fa3fe077 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -27,6 +27,7 @@ config COLDFIRE
  	select CPU_HAS_NO_BITFIELDS
  	select CPU_HAS_NO_CAS
  	select CPU_HAS_NO_MULDIV64
+	select DMA_DEFAULT_COHERENT if !MMU && !M523x
  	select GENERIC_CSUM
  	select GPIOLIB
  	select HAVE_LEGACY_CLK


Thanks,
Robin.

> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> +		gfp_t gfp)
> +{
> +	return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp);
> +}
> +
> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
> +		dma_addr_t handle)
> +{
> +	dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL);
> +}
> +#else /* CONFIG_COLDFIRE */
> +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> +		gfp_t gfp)
> +{
> +	return dma_alloc_coherent(dev, size, handle, gfp);
> +}
> +
> +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr,
> +		dma_addr_t handle)
> +{
> +	dma_free_coherent(dev, size, cpu_addr, handle);
> +}
> +#endif /* !CONFIG_COLDFIRE */
> +
> +struct fec_dma_devres {
> +	size_t		size;
> +	void		*vaddr;
> +	dma_addr_t	dma_handle;
> +};
> +
> +static void fec_dmam_release(struct device *dev, void *res)
> +{
> +	struct fec_dma_devres *this = res;
> +
> +	fec_dma_free(dev, this->size, this->vaddr, this->dma_handle);
> +}
> +
> +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> +		gfp_t gfp)
> +{
> +	struct fec_dma_devres *dr;
> +	void *vaddr;
> +
> +	dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp);
> +	if (!dr)
> +		return NULL;
> +	vaddr = fec_dma_alloc(dev, size, handle, gfp);
> +	if (!vaddr) {
> +		devres_free(dr);
> +		return NULL;
> +	}
> +	dr->vaddr = vaddr;
> +	dr->dma_handle = *handle;
> +	dr->size = size;
> +	devres_add(dev, dr);
> +	return vaddr;
> +}
> +
>   static inline bool is_ipv4_pkt(struct sk_buff *skb)
>   {
>   	return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
> @@ -1661,6 +1725,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
>   #endif
>   
>   #ifdef CONFIG_M532x
> +	/*
> +	 * Hacky flush of all caches instead of using the DMA API for the TSO
> +	 * headers.
> +	 */
>   	flush_cache_all();
>   #endif
>   	rxq = fep->rx_queue[queue_id];
> @@ -3288,10 +3356,9 @@ static void fec_enet_free_queue(struct net_device *ndev)
>   	for (i = 0; i < fep->num_tx_queues; i++)
>   		if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) {
>   			txq = fep->tx_queue[i];
> -			dma_free_coherent(&fep->pdev->dev,
> -					  txq->bd.ring_size * TSO_HEADER_SIZE,
> -					  txq->tso_hdrs,
> -					  txq->tso_hdrs_dma);
> +			fec_dma_free(&fep->pdev->dev,
> +				     txq->bd.ring_size * TSO_HEADER_SIZE,
> +				     txq->tso_hdrs, txq->tso_hdrs_dma);
>   		}
>   
>   	for (i = 0; i < fep->num_rx_queues; i++)
> @@ -3321,10 +3388,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev)
>   		txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;
>   		txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS;
>   
> -		txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,
> +		txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev,
>   					txq->bd.ring_size * TSO_HEADER_SIZE,
> -					&txq->tso_hdrs_dma,
> -					GFP_KERNEL);
> +					&txq->tso_hdrs_dma, GFP_KERNEL);
>   		if (!txq->tso_hdrs) {
>   			ret = -ENOMEM;
>   			goto alloc_failed;
> @@ -4043,8 +4109,8 @@ static int fec_enet_init(struct net_device *ndev)
>   	bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
>   
>   	/* Allocate memory for buffer descriptors. */
> -	cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,
> -				       GFP_KERNEL);
> +	cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma,
> +				  GFP_KERNEL);
>   	if (!cbd_base) {
>   		ret = -ENOMEM;
>   		goto free_queue_mem;