[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0a77e5a4-b4c8-4e5c-84f7-b6b56d4c0437@nvidia.com>
Date: Thu, 27 Feb 2025 11:17:14 +0000
From: Jon Hunter <jonathanh@...dia.com>
To: Vishwaroop A <va@...dia.com>, thierry.reding@...il.com,
skomatineni@...dia.com, ldewangan@...dia.com, broonie@...nel.org,
linux-spi@...r.kernel.org, linux-tegra@...r.kernel.org,
linux-kernel@...r.kernel.org, kyarlagadda@...dia.com, smangipudi@...dia.com
Subject: Re: [PATCH v2 6/6] spi: tegra210-quad: Introduce native DMA support
On 12/02/2025 14:46, Vishwaroop A wrote:
> Previous generations of Tegra supported DMA operations by an external
> DMA controller, but the QSPI on Tegra234 devices now have an internal
> DMA controller.
>
> Internal DMA: Uses the QSPI controller's built-in DMA engine, which is
> limited in capabilities and tied directly to the QSPI module.
>
> External DMA: Utilizes a separate, GPCDMA DMA controller that can
> transfer data between QSPI and any memory location.
>
> Native DMA Initialization: Introduce routines to initialize and
> configure native DMA channels for both transmit and receive paths.
> Set up DMA mapping functions to manage buffer addresses effectively.
>
> Enhance Transfer Logic: Implement logic to choose between CPU-based
> and DMA-based transfers based on data size.
>
> Signed-off-by: Vishwaroop A <va@...dia.com>
> ---
> drivers/spi/spi-tegra210-quad.c | 218 ++++++++++++++++++--------------
> 1 file changed, 126 insertions(+), 92 deletions(-)
>
> diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
> index 04f41e92c1e2..7463b00b1ffb 100644
> --- a/drivers/spi/spi-tegra210-quad.c
> +++ b/drivers/spi/spi-tegra210-quad.c
> @@ -111,6 +111,9 @@
> #define QSPI_DMA_BLK 0x024
> #define QSPI_DMA_BLK_SET(x) (((x) & 0xffff) << 0)
>
> +#define QSPI_DMA_MEM_ADDRESS_REG 0x028
> +#define QSPI_DMA_HI_ADDRESS_REG 0x02c
> +
> #define QSPI_TX_FIFO 0x108
> #define QSPI_RX_FIFO 0x188
>
> @@ -167,9 +170,9 @@ enum tegra_qspi_transfer_type {
> };
>
> struct tegra_qspi_soc_data {
> - bool has_dma;
> bool cmb_xfer_capable;
> bool supports_tpm;
> + bool has_ext_dma;
> unsigned int cs_count;
> };
>
> @@ -605,17 +608,21 @@ static void tegra_qspi_dma_unmap_xfer(struct tegra_qspi *tqspi, struct spi_trans
>
> len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
>
> - dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
> - dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
> + if (t->tx_buf)
> + dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
> + if (t->rx_buf)
> + dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
> }
>
> static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct spi_transfer *t)
> {
> struct dma_slave_config dma_sconfig = { 0 };
> + dma_addr_t rx_dma_phys, tx_dma_phys;
> unsigned int len;
> u8 dma_burst;
> int ret = 0;
> u32 val;
> + bool has_ext_dma = tqspi->soc_data->has_ext_dma;
>
> if (tqspi->is_packed) {
> ret = tegra_qspi_dma_map_xfer(tqspi, t);
> @@ -634,60 +641,85 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct
> len = tqspi->curr_dma_words * 4;
>
> /* set attention level based on length of transfer */
> - val = 0;
> - if (len & 0xf) {
> - val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
> - dma_burst = 1;
> - } else if (((len) >> 4) & 0x1) {
> - val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
> - dma_burst = 4;
> - } else {
> - val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
> - dma_burst = 8;
> + if (has_ext_dma) {
> + val = 0;
> + if (len & 0xf) {
> + val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
> + dma_burst = 1;
> + } else if (((len) >> 4) & 0x1) {
> + val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
> + dma_burst = 4;
> + } else {
> + val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
> + dma_burst = 8;
> + }
> +
> + tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
> }
>
> - tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
> tqspi->dma_control_reg = val;
>
> dma_sconfig.device_fc = true;
> - if (tqspi->cur_direction & DATA_DIR_TX) {
> - dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
> - dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> - dma_sconfig.dst_maxburst = dma_burst;
> - ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
> - if (ret < 0) {
> - dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> - return ret;
> - }
>
> - tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
> - ret = tegra_qspi_start_tx_dma(tqspi, t, len);
> - if (ret < 0) {
> - dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
> - return ret;
> + if ((tqspi->cur_direction & DATA_DIR_TX)) {
> + if (has_ext_dma) {
For consistency with 'handle_dma_based_xfer' I think it is better to
use 'tqspi->tx_dma_chan' here.
> + dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
> + dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> + dma_sconfig.dst_maxburst = dma_burst;
> + ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
> + if (ret < 0) {
> + dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> + return ret;
> + }
> +
> + tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
> + ret = tegra_qspi_start_tx_dma(tqspi, t, len);
> + if (ret < 0) {
> + dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
> + return ret;
> + }
> + } else {
> + if (tqspi->is_packed)
> + tx_dma_phys = t->tx_dma;
> + else
> + tx_dma_phys = tqspi->tx_dma_phys;
> + tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
> + tegra_qspi_writel(tqspi, lower_32_bits(tx_dma_phys),
> + QSPI_DMA_MEM_ADDRESS_REG);
> + tegra_qspi_writel(tqspi, (upper_32_bits(tx_dma_phys) & 0xff),
> + QSPI_DMA_HI_ADDRESS_REG);
> }
> }
>
> if (tqspi->cur_direction & DATA_DIR_RX) {
> - dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
> - dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> - dma_sconfig.src_maxburst = dma_burst;
> - ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
> - if (ret < 0) {
> - dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> - return ret;
> - }
> -
> - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
> - tqspi->dma_buf_size,
> - DMA_FROM_DEVICE);
> -
> - ret = tegra_qspi_start_rx_dma(tqspi, t, len);
> - if (ret < 0) {
> - dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
> - if (tqspi->cur_direction & DATA_DIR_TX)
> - dmaengine_terminate_all(tqspi->tx_dma_chan);
> - return ret;
> + if (has_ext_dma) {
Same here.
> + dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
> + dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> + dma_sconfig.src_maxburst = dma_burst;
> + ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
> + if (ret < 0) {
> + dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
> + return ret;
> + }
> + dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
> + tqspi->dma_buf_size, DMA_FROM_DEVICE);
> + ret = tegra_qspi_start_rx_dma(tqspi, t, len);
> + if (ret < 0) {
> + dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
> + if (tqspi->cur_direction & DATA_DIR_TX)
> + dmaengine_terminate_all(tqspi->tx_dma_chan);
> + return ret;
> + }
> + } else {
> + if (tqspi->is_packed)
> + rx_dma_phys = t->rx_dma;
> + else
> + rx_dma_phys = tqspi->rx_dma_phys;
> +
> + tegra_qspi_writel(tqspi, lower_32_bits(rx_dma_phys),
> + QSPI_DMA_MEM_ADDRESS_REG);
> + tegra_qspi_writel(tqspi, (upper_32_bits(rx_dma_phys) & 0xff),
> + QSPI_DMA_HI_ADDRESS_REG);
> }
> }
>
> @@ -726,9 +758,6 @@ static int tegra_qspi_start_cpu_based_transfer(struct tegra_qspi *qspi, struct s
>
> static void tegra_qspi_deinit_dma(struct tegra_qspi *tqspi)
> {
> - if (!tqspi->soc_data->has_dma)
> - return;
> -
> if (tqspi->tx_dma_buf) {
> dma_free_coherent(tqspi->dev, tqspi->dma_buf_size,
> tqspi->tx_dma_buf, tqspi->tx_dma_phys);
> @@ -759,16 +788,26 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi)
> u32 *dma_buf;
> int err;
>
> - if (!tqspi->soc_data->has_dma)
> - return 0;
> + if (tqspi->soc_data->has_ext_dma) {
> + dma_chan = dma_request_chan(tqspi->dev, "rx");
> + if (IS_ERR(dma_chan)) {
> + err = PTR_ERR(dma_chan);
> + goto err_out;
> + }
>
> - dma_chan = dma_request_chan(tqspi->dev, "rx");
> - if (IS_ERR(dma_chan)) {
> - err = PTR_ERR(dma_chan);
> - goto err_out;
> - }
> + tqspi->rx_dma_chan = dma_chan;
>
> - tqspi->rx_dma_chan = dma_chan;
> + dma_chan = dma_request_chan(tqspi->dev, "tx");
> + if (IS_ERR(dma_chan)) {
> + err = PTR_ERR(dma_chan);
> + goto err_out;
> + }
> +
> + tqspi->tx_dma_chan = dma_chan;
> + } else {
> + tqspi->rx_dma_chan = NULL;
> + tqspi->tx_dma_chan = NULL;
> + }
>
> dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL);
> if (!dma_buf) {
> @@ -779,14 +818,6 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi)
> tqspi->rx_dma_buf = dma_buf;
> tqspi->rx_dma_phys = dma_phys;
>
> - dma_chan = dma_request_chan(tqspi->dev, "tx");
> - if (IS_ERR(dma_chan)) {
> - err = PTR_ERR(dma_chan);
> - goto err_out;
> - }
> -
> - tqspi->tx_dma_chan = dma_chan;
> -
> dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL);
> if (!dma_buf) {
> err = -ENOMEM;
> @@ -1056,6 +1087,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
> struct spi_message *msg)
> {
> bool is_first_msg = true;
> + bool has_ext_dma = tqspi->soc_data->has_ext_dma;
> struct spi_transfer *xfer;
> struct spi_device *spi = msg->spi;
> u8 transfer_phase = 0;
> @@ -1128,15 +1160,12 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
> if (WARN_ON(ret == 0)) {
> dev_err(tqspi->dev, "QSPI Transfer failed with timeout: %d\n",
> ret);
> - if (tqspi->is_curr_dma_xfer &&
> - (tqspi->cur_direction & DATA_DIR_TX))
> - dmaengine_terminate_all
> - (tqspi->tx_dma_chan);
> -
> - if (tqspi->is_curr_dma_xfer &&
> - (tqspi->cur_direction & DATA_DIR_RX))
> - dmaengine_terminate_all
> - (tqspi->rx_dma_chan);
> + if (tqspi->is_curr_dma_xfer && has_ext_dma) {
> + if (tqspi->cur_direction & DATA_DIR_TX)
> + dmaengine_terminate_all(tqspi->tx_dma_chan);
> + if (tqspi->cur_direction & DATA_DIR_RX)
> + dmaengine_terminate_all(tqspi->rx_dma_chan);
> + }
>
> /* Abort transfer by resetting pio/dma bit */
> if (!tqspi->is_curr_dma_xfer) {
> @@ -1197,6 +1226,7 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
> struct spi_device *spi = msg->spi;
> struct spi_transfer *transfer;
> bool is_first_msg = true;
> + bool has_ext_dma = tqspi->soc_data->has_ext_dma;
> int ret = 0, val = 0;
>
> msg->status = 0;
> @@ -1251,10 +1281,12 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
> QSPI_DMA_TIMEOUT);
> if (WARN_ON(ret == 0)) {
> dev_err(tqspi->dev, "transfer timeout\n");
> - if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX))
> - dmaengine_terminate_all(tqspi->tx_dma_chan);
> - if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_RX))
> - dmaengine_terminate_all(tqspi->rx_dma_chan);
> + if (tqspi->is_curr_dma_xfer && has_ext_dma) {
And here.
> + if (tqspi->cur_direction & DATA_DIR_TX)
> + dmaengine_terminate_all(tqspi->tx_dma_chan);
> + if (tqspi->cur_direction & DATA_DIR_RX)
> + dmaengine_terminate_all(tqspi->rx_dma_chan);
> + }
> tegra_qspi_handle_error(tqspi);
> ret = -EIO;
> goto complete_xfer;
> @@ -1323,7 +1355,7 @@ static bool tegra_qspi_validate_cmb_seq(struct tegra_qspi *tqspi,
> return false;
> xfer = list_next_entry(xfer, transfer_list);
> }
> - if (!tqspi->soc_data->has_dma && xfer->len > (QSPI_FIFO_DEPTH << 2))
> + if (!tqspi->soc_data->has_ext_dma && xfer->len > (QSPI_FIFO_DEPTH << 2))
> return false;
>
> return true;
> @@ -1388,30 +1420,32 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi)
>
> if (tqspi->cur_direction & DATA_DIR_TX) {
> if (tqspi->tx_status) {
> - dmaengine_terminate_all(tqspi->tx_dma_chan);
> - err += 1;
> - } else {
> + if (tqspi->tx_dma_chan)
> + dmaengine_terminate_all(tqspi->tx_dma_chan);
> + err++;
> + } else if (tqspi->tx_dma_chan) {
> wait_status = wait_for_completion_interruptible_timeout(
> &tqspi->tx_dma_complete, QSPI_DMA_TIMEOUT);
> if (wait_status <= 0) {
> dmaengine_terminate_all(tqspi->tx_dma_chan);
> dev_err(tqspi->dev, "failed TX DMA transfer\n");
> - err += 1;
> + err++;
> }
> }
> }
>
> if (tqspi->cur_direction & DATA_DIR_RX) {
> if (tqspi->rx_status) {
> - dmaengine_terminate_all(tqspi->rx_dma_chan);
> - err += 2;
> - } else {
> + if (tqspi->rx_dma_chan)
> + dmaengine_terminate_all(tqspi->rx_dma_chan);
> + err++;
> + } else if (tqspi->rx_dma_chan) {
> wait_status = wait_for_completion_interruptible_timeout(
> &tqspi->rx_dma_complete, QSPI_DMA_TIMEOUT);
> if (wait_status <= 0) {
> dmaengine_terminate_all(tqspi->rx_dma_chan);
> dev_err(tqspi->dev, "failed RX DMA transfer\n");
> - err += 2;
> + err++;
> }
> }
> }
> @@ -1474,28 +1508,28 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
> }
>
> static struct tegra_qspi_soc_data tegra210_qspi_soc_data = {
> - .has_dma = true,
> + .has_ext_dma = true,
> .cmb_xfer_capable = false,
> .supports_tpm = false,
> .cs_count = 1,
> };
>
> static struct tegra_qspi_soc_data tegra186_qspi_soc_data = {
> - .has_dma = true,
> + .has_ext_dma = true,
> .cmb_xfer_capable = true,
> .supports_tpm = false,
> .cs_count = 1,
> };
>
> static struct tegra_qspi_soc_data tegra234_qspi_soc_data = {
> - .has_dma = false,
> + .has_ext_dma = false,
> .cmb_xfer_capable = true,
> .supports_tpm = true,
> .cs_count = 1,
> };
>
> static struct tegra_qspi_soc_data tegra241_qspi_soc_data = {
> - .has_dma = false,
> + .has_ext_dma = true,
> .cmb_xfer_capable = true,
> .supports_tpm = true,
> .cs_count = 4,
--
nvpublic
Powered by blists - more mailing lists