lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dnl7ybsecsff3pv5g4kzhyvhj4yoj6nxhvc6ugmv2pw4btdg6t@dglr2hitc2nv>
Date: Thu, 27 Feb 2025 12:14:01 +0100
From: Thierry Reding <thierry.reding@...il.com>
To: Vishwaroop A <va@...dia.com>
Cc: jonathanh@...dia.com, skomatineni@...dia.com, ldewangan@...dia.com, 
	broonie@...nel.org, linux-spi@...r.kernel.org, linux-tegra@...r.kernel.org, 
	linux-kernel@...r.kernel.org, kyarlagadda@...dia.com, smangipudi@...dia.com
Subject: Re: [PATCH v2 6/6] spi: tegra210-quad: Introduce native DMA support

On Wed, Feb 12, 2025 at 02:46:51PM +0000, Vishwaroop A wrote:
> Previous generations of Tegra supported DMA operations by an external
> DMA controller, but the QSPI on Tegra234 devices now have an internal
> DMA controller.
> 
> Internal DMA: Uses the QSPI controller's built-in DMA engine, which is
> limited in capabilities and tied directly to the QSPI module.
> 
> External DMA: Utilizes a separate, GPCDMA DMA controller that can
> transfer data between QSPI and any memory location.
> 
> Native DMA Initialization: Introduce routines to initialize and
> configure native DMA channels for both transmit and receive paths.
> Set up DMA mapping functions to manage buffer addresses effectively.
> 
> Enhance Transfer Logic: Implement logic to choose between CPU-based
> and DMA-based transfers based on data size.
> 
> Signed-off-by: Vishwaroop A <va@...dia.com>
> ---
>  drivers/spi/spi-tegra210-quad.c | 218 ++++++++++++++++++--------------
>  1 file changed, 126 insertions(+), 92 deletions(-)
> 
> diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
> index 04f41e92c1e2..7463b00b1ffb 100644
> --- a/drivers/spi/spi-tegra210-quad.c
> +++ b/drivers/spi/spi-tegra210-quad.c
> @@ -111,6 +111,9 @@
>  #define QSPI_DMA_BLK				0x024
>  #define QSPI_DMA_BLK_SET(x)			(((x) & 0xffff) << 0)
>  
> +#define QSPI_DMA_MEM_ADDRESS_REG		0x028
> +#define QSPI_DMA_HI_ADDRESS_REG			0x02c

I'd drop the _REG suffix since we don't use it on any of the other
register definitions.

> +
>  #define QSPI_TX_FIFO				0x108
>  #define QSPI_RX_FIFO				0x188
>  
> @@ -167,9 +170,9 @@ enum tegra_qspi_transfer_type {
>  };
>  
>  struct tegra_qspi_soc_data {
> -	bool has_dma;
>  	bool cmb_xfer_capable;
>  	bool supports_tpm;
> +	bool has_ext_dma;
>  	unsigned int cs_count;
>  };
>  
> @@ -605,17 +608,21 @@ static void tegra_qspi_dma_unmap_xfer(struct tegra_qspi *tqspi, struct spi_trans
>  
>  	len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
>  
> -	dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
> -	dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
> +	if (t->tx_buf)
> +		dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
> +	if (t->rx_buf)
> +		dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
>  }
>  
>  static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct spi_transfer *t)
>  {
>  	struct dma_slave_config dma_sconfig = { 0 };
> +	dma_addr_t rx_dma_phys, tx_dma_phys;
>  	unsigned int len;
>  	u8 dma_burst;
>  	int ret = 0;
>  	u32 val;
> +	bool has_ext_dma = tqspi->soc_data->has_ext_dma;
>  
>  	if (tqspi->is_packed) {
>  		ret = tegra_qspi_dma_map_xfer(tqspi, t);
> @@ -634,60 +641,85 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct
>  		len = tqspi->curr_dma_words * 4;
>  
>  	/* set attention level based on length of transfer */
> -	val = 0;
> -	if (len & 0xf) {
> -		val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
> -		dma_burst = 1;
> -	} else if (((len) >> 4) & 0x1) {
> -		val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
> -		dma_burst = 4;
> -	} else {
> -		val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
> -		dma_burst = 8;
> +	if (has_ext_dma) {
> +		val = 0;
> +		if (len & 0xf) {
> +			val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
> +			dma_burst = 1;
> +		} else if (((len) >> 4) & 0x1) {
> +			val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
> +			dma_burst = 4;
> +		} else {
> +			val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
> +			dma_burst = 8;
> +		}
> +
> +		tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
>  	}
>  
> -	tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
>  	tqspi->dma_control_reg = val;
>  
>  	dma_sconfig.device_fc = true;
> -	if (tqspi->cur_direction & DATA_DIR_TX) {
> -		dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
> -		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> -		dma_sconfig.dst_maxburst = dma_burst;
> -		ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
> -		if (ret < 0) {
> -			dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> -			return ret;
> -		}
>  
> -		tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
> -		ret = tegra_qspi_start_tx_dma(tqspi, t, len);
> -		if (ret < 0) {
> -			dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
> -			return ret;
> +	if ((tqspi->cur_direction & DATA_DIR_TX)) {
> +		if (has_ext_dma) {
> +			dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
> +			dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +			dma_sconfig.dst_maxburst = dma_burst;
> +			ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
> +			if (ret < 0) {
> +				dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> +				return ret;
> +			}
> +
> +			tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
> +			ret = tegra_qspi_start_tx_dma(tqspi, t, len);
> +			if (ret < 0) {
> +				dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
> +				return ret;
> +			}
> +		} else {
> +			if (tqspi->is_packed)
> +				tx_dma_phys = t->tx_dma;
> +			else
> +				tx_dma_phys = tqspi->tx_dma_phys;
> +			tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
> +			tegra_qspi_writel(tqspi, lower_32_bits(tx_dma_phys),
> +					  QSPI_DMA_MEM_ADDRESS_REG);
> +			tegra_qspi_writel(tqspi, (upper_32_bits(tx_dma_phys) & 0xff),
> +					  QSPI_DMA_HI_ADDRESS_REG);
>  		}
>  	}
>  
>  	if (tqspi->cur_direction & DATA_DIR_RX) {
> -		dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
> -		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> -		dma_sconfig.src_maxburst = dma_burst;
> -		ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
> -		if (ret < 0) {
> -			dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> -			return ret;
> -		}
> -
> -		dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
> -					   tqspi->dma_buf_size,
> -					   DMA_FROM_DEVICE);
> -
> -		ret = tegra_qspi_start_rx_dma(tqspi, t, len);
> -		if (ret < 0) {
> -			dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
> -			if (tqspi->cur_direction & DATA_DIR_TX)
> -				dmaengine_terminate_all(tqspi->tx_dma_chan);
> -			return ret;
> +		if (has_ext_dma) {
> +			dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
> +			dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +			dma_sconfig.src_maxburst = dma_burst;
> +			ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
> +			if (ret < 0) {
> +				dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> +				return ret;
> +			}
> +			dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
> +						   tqspi->dma_buf_size, DMA_FROM_DEVICE);
> +			ret = tegra_qspi_start_rx_dma(tqspi, t, len);
> +			if (ret < 0) {
> +				dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
> +				if (tqspi->cur_direction & DATA_DIR_TX)
> +					dmaengine_terminate_all(tqspi->tx_dma_chan);
> +				return ret;
> +			}

Please keep the whitespace that was there before (maybe even add a few
blank lines) to make this less cluttered.

> +		} else {
> +			if (tqspi->is_packed)
> +				rx_dma_phys = t->rx_dma;
> +			else
> +				rx_dma_phys = tqspi->rx_dma_phys;
> +
> +			tegra_qspi_writel(tqspi, lower_32_bits(rx_dma_phys),
> +					  QSPI_DMA_MEM_ADDRESS_REG);
> +			tegra_qspi_writel(tqspi, (upper_32_bits(rx_dma_phys) & 0xff),
> +					  QSPI_DMA_HI_ADDRESS_REG);
>  		}

This doesn't look right. You're passing a memory buffer to hardware
here, so this needs DMA sync operations, too.

[...]
> @@ -1388,30 +1420,32 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi)
>  
>  	if (tqspi->cur_direction & DATA_DIR_TX) {
>  		if (tqspi->tx_status) {
> -			dmaengine_terminate_all(tqspi->tx_dma_chan);
> -			err += 1;
> -		} else {
> +			if (tqspi->tx_dma_chan)
> +				dmaengine_terminate_all(tqspi->tx_dma_chan);
> +			err++;
> +		} else if (tqspi->tx_dma_chan) {
>  			wait_status = wait_for_completion_interruptible_timeout(
>  				&tqspi->tx_dma_complete, QSPI_DMA_TIMEOUT);
>  			if (wait_status <= 0) {
>  				dmaengine_terminate_all(tqspi->tx_dma_chan);
>  				dev_err(tqspi->dev, "failed TX DMA transfer\n");
> -				err += 1;
> +				err++;
>  			}
>  		}
>  	}
>  
>  	if (tqspi->cur_direction & DATA_DIR_RX) {
>  		if (tqspi->rx_status) {
> -			dmaengine_terminate_all(tqspi->rx_dma_chan);
> -			err += 2;
> -		} else {
> +			if (tqspi->rx_dma_chan)
> +				dmaengine_terminate_all(tqspi->rx_dma_chan);
> +			err++;
> +		} else if (tqspi->rx_dma_chan) {
>  			wait_status = wait_for_completion_interruptible_timeout(
>  				&tqspi->rx_dma_complete, QSPI_DMA_TIMEOUT);
>  			if (wait_status <= 0) {
>  				dmaengine_terminate_all(tqspi->rx_dma_chan);
>  				dev_err(tqspi->dev, "failed RX DMA transfer\n");
> -				err += 2;
> +				err++;

Maybe we should change the "err" variable to something like "errors" or
"num_errors" to make it clear what this does. As it is, it's easily
mistaken to be a negative error code, in which case ++ wouldn't make
sense.

Thierry

Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ