[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251016132923.3577429-3-va@nvidia.com>
Date: Thu, 16 Oct 2025 13:29:23 +0000
From: Vishwaroop A <va@...dia.com>
To: Mark Brown <broonie@...nel.org>, Thierry Reding
<thierry.reding@...il.com>, Jonathan Hunter <jonathanh@...dia.com>, "Sowjanya
Komatineni" <skomatineni@...dia.com>, Laxman Dewangan <ldewangan@...dia.com>,
<smangipudi@...dia.com>, <kyarlagadda@...dia.com>
CC: Vishwaroop A <va@...dia.com>, <linux-spi@...r.kernel.org>,
<linux-tegra@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: [PATCH v2 2/2] spi: tegra210-quad: Check hardware status on timeout
Under high system load, QSPI interrupts can be delayed or blocked on the
target CPU, causing wait_for_completion_timeout() to report failure even
though the hardware successfully completed the transfer. This has been
observed in production during error injection, RAS firmware activity, and
CPU saturation scenarios.
When a timeout occurs, check the QSPI_RDY bit in QSPI_TRANS_STATUS to
determine if the hardware actually completed the transfer. If so, manually
invoke the completion handler to process the transfer successfully instead
of failing it.
This distinguishes lost/delayed interrupts from real hardware timeouts,
preventing unnecessary failures of transfers that completed successfully.
Signed-off-by: Vishwaroop A <va@...dia.com>
---
drivers/spi/spi-tegra210-quad.c | 164 ++++++++++++++++++++++----------
1 file changed, 114 insertions(+), 50 deletions(-)
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 10e56d8ef678..757e9fe23e0e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -1019,17 +1019,22 @@ static void tegra_qspi_dump_regs(struct tegra_qspi *tqspi)
tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS));
}
-static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
+static void tegra_qspi_reset(struct tegra_qspi *tqspi)
{
- dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
- tegra_qspi_dump_regs(tqspi);
- tegra_qspi_flush_fifos(tqspi, true);
if (device_reset(tqspi->dev) < 0) {
dev_warn_once(tqspi->dev, "device reset failed\n");
tegra_qspi_mask_clear_irq(tqspi);
}
}
+static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
+{
+ dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
+ tegra_qspi_dump_regs(tqspi);
+ tegra_qspi_flush_fifos(tqspi, true);
+ tegra_qspi_reset(tqspi);
+}
+
static void tegra_qspi_transfer_end(struct spi_device *spi)
{
struct tegra_qspi *tqspi = spi_controller_get_devdata(spi->controller);
@@ -1043,6 +1048,49 @@ static void tegra_qspi_transfer_end(struct spi_device *spi)
tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
}
+static irqreturn_t handle_cpu_based_xfer(struct tegra_qspi *tqspi);
+static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi);
+
+/**
+ * tegra_qspi_handle_timeout - Handle transfer timeout with hardware check
+ * @tqspi: QSPI controller instance
+ *
+ * When a timeout occurs but hardware has completed the transfer (interrupt
+ * was lost or delayed), manually trigger transfer completion processing.
+ * This avoids failing transfers that actually succeeded.
+ *
+ * Returns: 0 if transfer was completed, -ETIMEDOUT if real timeout
+ */
+static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi)
+{
+ irqreturn_t ret;
+ u32 status;
+
+ /* Check if hardware actually completed the transfer */
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ if (!(status & QSPI_RDY))
+ return -ETIMEDOUT;
+
+ /*
+ * Hardware completed but interrupt was lost/delayed. Manually
+ * process the completion by calling the appropriate handler.
+ */
+ dev_warn_ratelimited(tqspi->dev,
+ "QSPI interrupt timeout, but transfer complete\n");
+
+ /* Clear the transfer status */
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS);
+
+ /* Manually trigger completion handler */
+ if (!tqspi->is_curr_dma_xfer)
+ ret = handle_cpu_based_xfer(tqspi);
+ else
+ ret = handle_dma_based_xfer(tqspi);
+
+ return (ret == IRQ_HANDLED) ? 0 : -EIO;
+}
+
static u32 tegra_qspi_cmd_config(bool is_ddr, u8 bus_width, u8 len)
{
u32 cmd_config = 0;
@@ -1074,6 +1122,30 @@ static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len)
return addr_config;
}
+static void tegra_qspi_dma_stop(struct tegra_qspi *tqspi)
+{
+ u32 value;
+
+ if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+
+ if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
+ dmaengine_terminate_all(tqspi->rx_dma_chan);
+
+ value = tegra_qspi_readl(tqspi, QSPI_DMA_CTL);
+ value &= ~QSPI_DMA_EN;
+ tegra_qspi_writel(tqspi, value, QSPI_DMA_CTL);
+}
+
+static void tegra_qspi_pio_stop(struct tegra_qspi *tqspi)
+{
+ u32 value;
+
+ value = tegra_qspi_readl(tqspi, QSPI_COMMAND1);
+ value &= ~QSPI_PIO;
+ tegra_qspi_writel(tqspi, value, QSPI_COMMAND1);
+}
+
static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
struct spi_message *msg)
{
@@ -1081,7 +1153,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
struct spi_transfer *xfer;
struct spi_device *spi = msg->spi;
u8 transfer_phase = 0;
- u32 cmd1 = 0, dma_ctl = 0;
+ u32 cmd1 = 0;
int ret = 0;
u32 address_value = 0;
u32 cmd_config = 0, addr_config = 0;
@@ -1148,43 +1220,28 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
QSPI_DMA_TIMEOUT);
if (WARN_ON_ONCE(ret == 0)) {
- dev_err_ratelimited(tqspi->dev,
- "QSPI Transfer failed with timeout\n");
- if (tqspi->is_curr_dma_xfer) {
- if ((tqspi->cur_direction & DATA_DIR_TX) &&
- tqspi->tx_dma_chan)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if ((tqspi->cur_direction & DATA_DIR_RX) &&
- tqspi->rx_dma_chan)
- dmaengine_terminate_all(tqspi->rx_dma_chan);
- }
-
- /* Abort transfer by resetting pio/dma bit */
- if (!tqspi->is_curr_dma_xfer) {
- cmd1 = tegra_qspi_readl
- (tqspi,
- QSPI_COMMAND1);
- cmd1 &= ~QSPI_PIO;
- tegra_qspi_writel
- (tqspi, cmd1,
- QSPI_COMMAND1);
- } else {
- dma_ctl = tegra_qspi_readl
- (tqspi,
- QSPI_DMA_CTL);
- dma_ctl &= ~QSPI_DMA_EN;
- tegra_qspi_writel(tqspi, dma_ctl,
- QSPI_DMA_CTL);
- }
-
- /* Reset controller if timeout happens */
- if (device_reset(tqspi->dev) < 0) {
- dev_warn_once(tqspi->dev,
- "device reset failed\n");
- tegra_qspi_mask_clear_irq(tqspi);
+ /*
+ * Check if hardware completed the transfer
+ * even though interrupt was lost or delayed.
+ * If so, process the completion and continue.
+ */
+ ret = tegra_qspi_handle_timeout(tqspi);
+ if (ret < 0) {
+ /* Real timeout - clean up and fail */
+ dev_err(tqspi->dev, "transfer timeout\n");
+
+ /* Abort transfer by resetting pio/dma bit */
+ if (tqspi->is_curr_dma_xfer)
+ tegra_qspi_dma_stop(tqspi);
+ else
+ tegra_qspi_pio_stop(tqspi);
+
+ /* Reset controller if timeout happens */
+ tegra_qspi_reset(tqspi);
+
+ ret = -EIO;
+ goto exit;
}
- ret = -EIO;
- goto exit;
}
if (tqspi->tx_status || tqspi->rx_status) {
@@ -1275,16 +1332,23 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
ret = wait_for_completion_timeout(&tqspi->xfer_completion,
QSPI_DMA_TIMEOUT);
if (WARN_ON(ret == 0)) {
- dev_err(tqspi->dev, "transfer timeout\n");
- if (tqspi->is_curr_dma_xfer) {
- if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
- dmaengine_terminate_all(tqspi->rx_dma_chan);
+ /*
+ * Check if hardware completed the transfer even though
+ * interrupt was lost or delayed. If so, process the
+ * completion and continue.
+ */
+ ret = tegra_qspi_handle_timeout(tqspi);
+ if (ret < 0) {
+ /* Real timeout - clean up and fail */
+ dev_err(tqspi->dev, "transfer timeout\n");
+
+ if (tqspi->is_curr_dma_xfer)
+ tegra_qspi_dma_stop(tqspi);
+
+ tegra_qspi_handle_error(tqspi);
+ ret = -EIO;
+ goto complete_xfer;
}
- tegra_qspi_handle_error(tqspi);
- ret = -EIO;
- goto complete_xfer;
}
if (tqspi->tx_status || tqspi->rx_status) {
--
2.17.1
Powered by blists - more mailing lists