linux-kernel - [RFC PATCH v2 06/19] scsi: scsi_error: Add flags to mark error handle steps has done

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230901094127.2010873-7-haowenchao2@huawei.com>
Date:   Fri, 1 Sep 2023 17:41:14 +0800
From:   Wenchao Hao <haowenchao2@...wei.com>
To:     "James E . J . Bottomley" <jejb@...ux.ibm.com>,
        "Martin K . Petersen" <martin.petersen@...cle.com>,
        <linux-scsi@...r.kernel.org>
CC:     Hannes Reinecke <hare@...e.de>, <linux-kernel@...r.kernel.org>,
        <louhongxiang@...wei.com>, <lixiaokeng@...wei.com>,
        Wenchao Hao <haowenchao2@...wei.com>
Subject: [RFC PATCH v2 06/19] scsi: scsi_error: Add flags to mark error handle steps has done

LUN based error handle would mainly do three steps to recover
commands which are check sense, start unit, and reset lun. It might
fallback to target/host based error handle which would do these steps
too.

Target based error handle would reset target, it would also fallback
to host based error handle.

Add some flags to mark these steps are done to avoid repeating
these steps.

The flags should be cleared when LUN/target based error handler is
waked up or when target/host based error handle finished, and set
when fallback to target/host based error handle.

scsi_eh_get_sense, scsi_eh_stu, scsi_eh_bus_device_reset and
scsi_eh_target_reset would check these flags before actually action.

Signed-off-by: Wenchao Hao <haowenchao2@...wei.com>
---
 drivers/scsi/scsi_error.c  | 55 ++++++++++++++++++++++++++++++++++++++
 include/scsi/scsi_device.h | 28 +++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 16888540b663..055c04470f5c 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -57,10 +57,50 @@
 #define BUS_RESET_SETTLE_TIME   (10)
 #define HOST_RESET_SETTLE_TIME  (10)
 
+#define sdev_flags_done(flag)					\
+static inline int sdev_##flag(struct scsi_device *sdev)		\
+{								\
+	struct scsi_device_eh *eh = sdev->eh;			\
+	if (!eh)						\
+		return 0;					\
+	return eh->flag;					\
+}
+
 static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
 static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *,
 						   struct scsi_cmnd *);
 
+sdev_flags_done(get_sense_done);
+sdev_flags_done(stu_done);
+sdev_flags_done(reset_done);
+
+static inline int starget_reset_done(struct scsi_target *starget)
+{
+	struct scsi_target_eh *eh = starget->eh;
+
+	if (!eh)
+		return 0;
+	return eh->reset_done;
+}
+
+static inline void shost_clear_eh_done(struct Scsi_Host *shost)
+{
+	struct scsi_device *sdev;
+	struct scsi_target *starget;
+
+	list_for_each_entry(starget, &shost->__targets, siblings)
+		if (starget->eh)
+			starget->eh->reset_done = 0;
+
+	shost_for_each_device(sdev, shost) {
+		if (!sdev->eh)
+			continue;
+		sdev->eh->get_sense_done = 0;
+		sdev->eh->stu_done	 = 0;
+		sdev->eh->reset_done	 = 0;
+	}
+}
+
 void scsi_eh_wakeup(struct Scsi_Host *shost)
 {
 	lockdep_assert_held(shost->host_lock);
@@ -1402,6 +1442,9 @@ int scsi_eh_get_sense(struct list_head *work_q,
 					     current->comm));
 			break;
 		}
+		if (sdev_get_sense_done(scmd->device) ||
+		    starget_reset_done(scsi_target(scmd->device)))
+			continue;
 		if (!scsi_status_is_check_condition(scmd->result))
 			/*
 			 * don't request sense if there's no check condition
@@ -1615,6 +1658,9 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
 			scsi_device_put(sdev);
 			break;
 		}
+		if (sdev_stu_done(sdev) ||
+		    starget_reset_done(scsi_target(sdev)))
+			continue;
 		stu_scmd = NULL;
 		list_for_each_entry(scmd, work_q, eh_entry)
 			if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
@@ -1698,6 +1744,9 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
 				bdr_scmd = scmd;
 				break;
 			}
+		if (sdev_reset_done(sdev) ||
+		    starget_reset_done(scsi_target(sdev)))
+			continue;
 
 		if (!bdr_scmd)
 			continue;
@@ -1746,6 +1795,11 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,
 		}
 
 		scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
+		if (starget_reset_done(scsi_target(scmd->device))) {
+			/* push back on work queue for further processing */
+			list_move(&scmd->eh_entry, work_q);
+			continue;
+		}
 		id = scmd_id(scmd);
 
 		SCSI_LOG_ERROR_RECOVERY(3,
@@ -2359,6 +2413,7 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
 	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
 		scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
 
+	shost_clear_eh_done(shost);
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->eh_deadline != -1)
 		shost->last_reset = 0;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index df3f1b8d1390..b03a4f21c7df 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -108,6 +108,24 @@ struct scsi_device;
 struct scsi_target;
 
 struct scsi_device_eh {
+	/*
+	 * LUN rebased error handle would mainly do three
+	 * steps to recovery commands which are
+	 *   check sense
+	 *   start unit
+	 *   reset lun
+	 * While we would fallback to target or host based error handle
+	 * which would do these steps too. Add flags to mark thes steps
+	 * are done to avoid repeating these steps.
+	 *
+	 * The flags should be cleared when LUN based error handler is
+	 * wakedup or when target/host based error handle finished,
+	 * set when fallback to target or host based error handle.
+	 */
+	unsigned get_sense_done:1;
+	unsigned stu_done:1;
+	unsigned reset_done:1;
+
 	/*
 	 * add scsi command to error handler so it would be handuled by
 	 * driver's error handle strategy
@@ -139,6 +157,16 @@ struct scsi_device_eh {
 };
 
 struct scsi_target_eh {
+	/*
+	 * flag to mark target reset is done to avoid repeating
+	 * these steps when fallback to host based error handle
+	 *
+	 * The flag should be cleared when target based error handler
+	 * is * wakedup or when host based error handle finished,
+	 * set when fallback to host based error handle.
+	 */
+	unsigned reset_done:1;
+
 	/*
 	 * add scsi command to error handler so it would be handuled by
 	 * driver's error handle strategy
-- 
2.35.3