lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <mqyiss7smpwlsfblxgrfkvsg55n7uctujtkmdtdejo7fg3hatj@22cyzfpuk23i>
Date: Thu, 16 Oct 2025 17:14:40 +0200
From: Thierry Reding <thierry.reding@...il.com>
To: Vishwaroop A <va@...dia.com>
Cc: Mark Brown <broonie@...nel.org>, 
	Jonathan Hunter <jonathanh@...dia.com>, Sowjanya Komatineni <skomatineni@...dia.com>, 
	Laxman Dewangan <ldewangan@...dia.com>, smangipudi@...dia.com, kyarlagadda@...dia.com, 
	linux-spi@...r.kernel.org, linux-tegra@...r.kernel.org, linux-kernel@...r.kernel.org, 
	Thierry Reding <treding@...dia.com>
Subject: Re: [PATCH v2 1/2] spi: tegra210-quad: Fix timeout handling

On Thu, Oct 16, 2025 at 01:29:22PM +0000, Vishwaroop A wrote:
> When the CPU that the QSPI interrupt handler runs on (typically CPU 0)
> is excessively busy, it can lead to rare cases of the IRQ thread not
> running before the transfer timeout is reached.
> 
> While handling the timeouts, any pending transfers are cleaned up and
> the message that they correspond to is marked as failed, which leaves
> the curr_xfer field pointing at stale memory.
> 
> To avoid this, clear curr_xfer to NULL upon timeout and check for this
> condition when the IRQ thread is finally run.
> 
> While at it, also make sure to clear interrupts on failure so that new
> interrupts can be run.
> 
> A better, more involved, fix would move the interrupt clearing into a
> hard IRQ handler. Ideally we would also want to signal that the IRQ
> thread no longer needs to be run after the timeout is hit to avoid the
> extra check for a valid transfer.
> 
> Fixes: 921fc1838fb0 ("spi: tegra210-quad: Add support for Tegra210 QSPI controller")
> Signed-off-by: Thierry Reding <treding@...dia.com>
> Signed-off-by: Vishwaroop A <va@...dia.com>
> ---
>  drivers/spi/spi-tegra210-quad.c | 23 ++++++++++++++++++++---
>  1 file changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
> index 3be7499db21e..10e56d8ef678 100644
> --- a/drivers/spi/spi-tegra210-quad.c
> +++ b/drivers/spi/spi-tegra210-quad.c
> @@ -1024,8 +1024,10 @@ static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
>  	dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
>  	tegra_qspi_dump_regs(tqspi);
>  	tegra_qspi_flush_fifos(tqspi, true);
> -	if (device_reset(tqspi->dev) < 0)
> +	if (device_reset(tqspi->dev) < 0) {
>  		dev_warn_once(tqspi->dev, "device reset failed\n");
> +		tegra_qspi_mask_clear_irq(tqspi);
> +	}
>  }
>  
>  static void tegra_qspi_transfer_end(struct spi_device *spi)
> @@ -1173,12 +1175,14 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
>  					dma_ctl &= ~QSPI_DMA_EN;
>  					tegra_qspi_writel(tqspi, dma_ctl,
>  							  QSPI_DMA_CTL);
> -				}
> +			}
>  
>  				/* Reset controller if timeout happens */
> -				if (device_reset(tqspi->dev) < 0)
> +				if (device_reset(tqspi->dev) < 0) {
>  					dev_warn_once(tqspi->dev,
>  						      "device reset failed\n");
> +					tegra_qspi_mask_clear_irq(tqspi);
> +				}
>  				ret = -EIO;
>  				goto exit;
>  			}
> @@ -1196,6 +1200,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
>  			goto exit;
>  		}
>  		msg->actual_length += xfer->len;
> +		tqspi->curr_xfer = NULL;
>  		if (!xfer->cs_change && transfer_phase == DATA_TRANSFER) {
>  			tegra_qspi_transfer_end(spi);
>  			spi_transfer_delay_exec(xfer);
> @@ -1205,6 +1210,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
>  	ret = 0;
>  
>  exit:
> +	tqspi->curr_xfer = NULL;
>  	msg->status = ret;
>  
>  	return ret;
> @@ -1290,6 +1296,8 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
>  		msg->actual_length += xfer->len + dummy_bytes;
>  
>  complete_xfer:
> +		tqspi->curr_xfer = NULL;
> +
>  		if (ret < 0) {
>  			tegra_qspi_transfer_end(spi);
>  			spi_transfer_delay_exec(xfer);

The original patch has another tqspi->curr_xfer = NULL in
handle_cpu_based_xfer() that doesn't seem to be in this patch. I'm
attaching it here again for reference.

Thierry

From 6c35ff11a4ef6e917aa1891df5cbd6bfd5902067 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@...dia.com>
Date: Tue, 7 Oct 2025 13:40:26 +0000
Subject: [PATCH] spi: tegra210-quad: Fix timeout handling

When the CPU that the QSPI interrupt handler runs on (typically CPU 0)
is excessively busy, it can lead to rare cases of the IRQ thread not
running before the transfer timeout is reached.

While handling the timeouts, any pending transfers are cleaned up and
the message that they correspond to is marked as failed, which leaves
the curr_xfer field pointing at stale memory.

To avoid this, clear curr_xfer to NULL upon timeout and check for this
condition when the IRQ thread is finally run.

While at it, also make sure to clear interrupts on failure so that new
interrupts can be run.

A better, more involved, fix would move the interrupt clearing into a
hard IRQ handler. Ideally we would also want to signal that the IRQ
thread no longer needs to be run after the timeout is hit to avoid the
extra check for a valid transfer.

Signed-off-by: Thierry Reding <treding@...dia.com>
---
 drivers/spi/spi-tegra210-quad.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 3be7499db21e..a103265e7eab 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -1024,8 +1024,10 @@ static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
 	dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
 	tegra_qspi_dump_regs(tqspi);
 	tegra_qspi_flush_fifos(tqspi, true);
-	if (device_reset(tqspi->dev) < 0)
+	if (device_reset(tqspi->dev) < 0) {
 		dev_warn_once(tqspi->dev, "device reset failed\n");
+		tegra_qspi_mask_clear_irq(tqspi);
+	}
 }
 
 static void tegra_qspi_transfer_end(struct spi_device *spi)
@@ -1176,9 +1178,11 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
 				}
 
 				/* Reset controller if timeout happens */
-				if (device_reset(tqspi->dev) < 0)
+				if (device_reset(tqspi->dev) < 0) {
 					dev_warn_once(tqspi->dev,
 						      "device reset failed\n");
+					tegra_qspi_mask_clear_irq(tqspi);
+				}
 				ret = -EIO;
 				goto exit;
 			}
@@ -1200,11 +1204,13 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
 			tegra_qspi_transfer_end(spi);
 			spi_transfer_delay_exec(xfer);
 		}
+		tqspi->curr_xfer = NULL;
 		transfer_phase++;
 	}
 	ret = 0;
 
 exit:
+	tqspi->curr_xfer = NULL;
 	msg->status = ret;
 
 	return ret;
@@ -1290,6 +1296,8 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
 		msg->actual_length += xfer->len + dummy_bytes;
 
 complete_xfer:
+		tqspi->curr_xfer = NULL;
+
 		if (ret < 0) {
 			tegra_qspi_transfer_end(spi);
 			spi_transfer_delay_exec(xfer);
@@ -1395,6 +1403,7 @@ static irqreturn_t handle_cpu_based_xfer(struct tegra_qspi *tqspi)
 	tegra_qspi_calculate_curr_xfer_param(tqspi, t);
 	tegra_qspi_start_cpu_based_transfer(tqspi, t);
 exit:
+	tqspi->curr_xfer = NULL;
 	spin_unlock_irqrestore(&tqspi->lock, flags);
 	return IRQ_HANDLED;
 }
@@ -1480,6 +1489,15 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
 {
 	struct tegra_qspi *tqspi = context_data;
 
+	/*
+	 * Occasionally the IRQ thread takes a long time to wake up (usually
+	 * when the CPU that it's running on is excessively busy) and we have
+	 * already reached the timeout before and cleaned up the timed out
+	 * transfer. Avoid any processing in that case and bail out early.
+	 */
+	if (tqspi->curr_xfer == NULL)
+		return IRQ_NONE;
+
 	tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
 
 	if (tqspi->cur_direction & DATA_DIR_TX)
-- 
2.51.0



Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ