[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fa3d6bb5-eea0-4cc3-86c2-93874f439b42@cybernetics.com>
Date: Mon, 8 Sep 2025 14:59:18 -0400
From: Tony Battersby <tonyb@...ernetics.com>
To: Nilesh Javali <njavali@...vell.com>,
GR-QLogic-Storage-Upstream@...vell.com,
"James E.J. Bottomley" <James.Bottomley@...senPartnership.com>,
"Martin K. Petersen" <martin.petersen@...cle.com>
Cc: linux-scsi <linux-scsi@...r.kernel.org>, target-devel@...r.kernel.org,
scst-devel@...ts.sourceforge.net,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [SCST PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort
This patch applies to the out-of-tree SCST project, not to the Linux
kernel. Apply when importing the upstream patch with the same title.
SCST addendum:
- In sqa_on_hw_pending_cmd_timeout(), call qlt_send_term_exchange()
first and then restart the timeout. After another timeout, force the
cmd to finish without waiting for a response from the HW.
Signed-off-by: Tony Battersby <tonyb@...ernetics.com>
---
qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 172 +++++++++++++-----
1 file changed, 122 insertions(+), 50 deletions(-)
diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
index e885b9711..9371dad06 100644
--- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
+++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
@@ -187,6 +187,7 @@ static struct cmd_state_name {
{QLA_TGT_STATE_NEED_DATA, "NeedData"},
{QLA_TGT_STATE_DATA_IN, "DataIn"},
{QLA_TGT_STATE_PROCESSED, "Processed"},
+ {QLA_TGT_STATE_DONE, "Done"},
};
static char *cmdstate_to_str(uint8_t state)
@@ -497,23 +498,14 @@ static void sqa_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
{
struct scst_cmd *scst_cmd = cmd->scst_cmd;
int rx_status;
- unsigned long flags;
TRACE_ENTRY();
- spin_lock_irqsave(&cmd->cmd_lock, flags);
- if (cmd->aborted) {
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
+ if (unlikely(cmd->aborted)) {
scst_set_cmd_error(scst_cmd,
SCST_LOAD_SENSE(scst_sense_internal_failure));
- scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
- SCST_CONTEXT_THREAD);
- return;
- }
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
- if (cmd->write_data_transferred) {
+ rx_status = SCST_RX_STATUS_ERROR_SENSE_SET;
+ } else if (likely(cmd->write_data_transferred)) {
rx_status = SCST_RX_STATUS_SUCCESS;
} else {
rx_status = SCST_RX_STATUS_ERROR_SENSE_SET;
@@ -691,6 +683,7 @@ static void sqa_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
TRACE_ENTRY();
+ cmd->state = QLA_TGT_STATE_DONE;
cmd->trc_flags |= TRC_CMD_DONE;
scst_tgt_cmd_done(scst_cmd, scst_work_context);
@@ -1522,9 +1515,10 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd)
cmd = scst_cmd_get_tgt_priv(scst_cmd);
if (scst_cmd_aborted_on_xmit(scst_cmd)) {
- TRACE_MGMT_DBG("sqatgt(%ld/%d): CMD_ABORTED cmd[%p]",
- cmd->vha->host_no, cmd->vha->vp_idx,
- cmd);
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: skipping send response for aborted cmd",
+ cmd->vha->host_no, cmd->vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
qlt_abort_cmd(cmd);
scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_ABORTED);
scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_DIRECT);
@@ -1841,18 +1835,26 @@ static int sqa_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
return t32;
}
+/*
+ * Prevent the CTIO completion handler from finding the given cmd, which will
+ * cause the CTIO to be ignored.
+ */
static void sqa_cleanup_hw_pending_cmd(scsi_qla_host_t *vha,
struct qla_tgt_cmd *cmd)
{
+ struct req_que *req = cmd->qpair->req;
uint32_t h;
- struct qla_qpair *qpair = cmd->qpair;
- for (h = 1; h < qpair->req->num_outstanding_cmds; h++) {
- if (qpair->req->outstanding_cmds[h] == (srb_t *)cmd) {
- printk(KERN_INFO "Clearing handle %d for cmd %p", h,
- cmd);
- //TRACE_DBG("Clearing handle %d for cmd %p", h, cmd);
- qpair->req->outstanding_cmds[h] = NULL;
+ cmd->cmd_sent_to_fw = 0;
+ cmd->trc_flags |= TRC_CTIO_IGNORED;
+
+ for (h = 1; h < req->num_outstanding_cmds; h++) {
+ if (req->outstanding_cmds[h] == (srb_t *)cmd) {
+ PRINT_INFO(
+ "sqatgt(%ld/%d): tag %lld: handle %x: detaching cmd from handle; CTIO will be ignored",
+ vha->host_no, vha->vp_idx,
+ se_cmd_tag(&cmd->se_cmd), h);
+ req->outstanding_cmds[h] = NULL;
break;
}
}
@@ -1863,50 +1865,120 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd)
struct qla_tgt_cmd *cmd = scst_cmd_get_tgt_priv(scst_cmd);
struct scsi_qla_host *vha = cmd->vha;
struct qla_qpair *qpair = cmd->qpair;
- uint8_t aborted = cmd->aborted;
unsigned long flags;
+ bool advance_cmd = false;
TRACE_ENTRY();
- TRACE_MGMT_DBG("sqatgt(%ld/%d): Cmd %p HW pending for too long (state %s) %s; %s;",
- vha->host_no, vha->vp_idx, cmd,
- cmdstate_to_str((uint8_t)cmd->state),
- cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
- aborted ? "aborted" : "not aborted");
-
- qlt_abort_cmd(cmd);
+ scst_cmd_get(scst_cmd);
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: HW pending for too long (state %s) %s; %s",
+ vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd),
+ cmdstate_to_str((uint8_t)cmd->state),
+ cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
+ cmd->aborted ? "aborted" : "not aborted");
+
switch (cmd->state) {
case QLA_TGT_STATE_NEW:
case QLA_TGT_STATE_DATA_IN:
- PRINT_ERROR("sqa(%ld): A command in state (%s) should not be HW pending. %s",
- vha->host_no, cmdstate_to_str((uint8_t)cmd->state),
- aborted ? "aborted" : "not aborted");
- break;
+ case QLA_TGT_STATE_DONE:
+ PRINT_ERROR(
+ "sqatgt(%ld/%d): tag %lld: A command in state (%s) should not be HW pending. %s",
+ vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd),
+ cmdstate_to_str((uint8_t)cmd->state),
+ cmd->aborted ? "aborted" : "not aborted");
+ goto out_unlock;
case QLA_TGT_STATE_NEED_DATA:
- /* the abort will nudge it out of FW */
- TRACE_MGMT_DBG("Force rx_data cmd %p", cmd);
- sqa_cleanup_hw_pending_cmd(vha, cmd);
- scst_set_cmd_error(scst_cmd,
- SCST_LOAD_SENSE(scst_sense_internal_failure));
- scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
- SCST_CONTEXT_THREAD);
- break;
case QLA_TGT_STATE_PROCESSED:
- if (!cmd->cmd_sent_to_fw)
- PRINT_ERROR("sqa(%ld): command should not be in HW pending. It's already processed. ",
- vha->host_no);
- else
- TRACE_MGMT_DBG("Force finishing cmd %p", cmd);
- sqa_cleanup_hw_pending_cmd(vha, cmd);
- scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED);
- scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD);
break;
}
+
+ /* Handle race with normal CTIO completion. */
+ if (!cmd->cmd_sent_to_fw) {
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: cmd not sent to fw; assuming just completed",
+ vha->host_no, vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
+ goto out_unlock;
+ }
+
+ /* Check for chip reset or a timeout after sending a term exchange. */
+ if (!qpair->fw_started ||
+ cmd->reset_count != qpair->chip_reset ||
+ (cmd->sent_term_exchg &&
+ time_is_before_jiffies(cmd->jiffies_at_term_exchg +
+ SQA_MAX_HW_PENDING_TIME * HZ / 2))) {
+ /*
+ * Timeout waiting for a response from the hw, so force the cmd
+ * to finish without waiting any longer. Note that this is
+ * slightly dangerous if the hw is still using the DMA
+ * mapping, so try term exchange first and see if that works.
+ */
+
+ sqa_cleanup_hw_pending_cmd(vha, cmd);
+
+ qlt_unmap_sg(vha, cmd);
+
+ advance_cmd = true;
+ } else {
+ /*
+ * We still expect a CTIO response from the hw. Terminating the
+ * exchange should force the CTIO response to happen sooner.
+ */
+ if (!cmd->sent_term_exchg)
+ qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1);
+ }
+
+ if (advance_cmd) {
+ switch (cmd->state) {
+ case QLA_TGT_STATE_NEED_DATA:
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: force rx_data",
+ vha->host_no, vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
+ cmd->state = QLA_TGT_STATE_DATA_IN;
+ scst_set_cmd_error(scst_cmd,
+ SCST_LOAD_SENSE(scst_sense_internal_failure));
+ scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
+ SCST_CONTEXT_THREAD);
+ break;
+
+ case QLA_TGT_STATE_PROCESSED:
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: force finishing cmd",
+ vha->host_no, vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
+ scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED);
+ scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD);
+ break;
+
+ default:
+ break;
+ }
+ } else {
+ /*
+ * Restart the timer so that this function is called again
+ * after another timeout. This is similar to
+ * scst_update_hw_pending_start() except that we also set
+ * cmd_hw_pending to 1.
+ *
+ * IRQs are already OFF.
+ */
+ spin_lock(&scst_cmd->sess->sess_list_lock);
+ scst_cmd->cmd_hw_pending = 1;
+ scst_cmd->hw_pending_start = jiffies;
+ spin_unlock(&scst_cmd->sess->sess_list_lock);
+ }
+
+out_unlock:
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+ scst_cmd_put(scst_cmd);
+
TRACE_EXIT();
}
--
2.43.0
Powered by blists - more mailing lists