lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251126021250.2583630-14-mkhalfella@purestorage.com>
Date: Tue, 25 Nov 2025 18:12:00 -0800
From: Mohamed Khalfella <mkhalfella@...estorage.com>
To: Chaitanya Kulkarni <kch@...dia.com>,
	Christoph Hellwig <hch@....de>,
	Jens Axboe <axboe@...nel.dk>,
	Keith Busch <kbusch@...nel.org>,
	Sagi Grimberg <sagi@...mberg.me>
Cc: Aaron Dailey <adailey@...estorage.com>,
	Randy Jennings <randyj@...estorage.com>,
	John Meneghini <jmeneghi@...hat.com>,
	Hannes Reinecke <hare@...e.de>,
	linux-nvme@...ts.infradead.org,
	linux-kernel@...r.kernel.org,
	Mohamed Khalfella <mkhalfella@...estorage.com>
Subject: [RFC PATCH 13/14] nvme-fc: Use CCR to recover controller that hits an error

An alive nvme controller that hits an error will now move to RECOVERING
state instead of RESETTING state. In RECOVERING state, ctrl->err_work
will attempt to use cross-controller recovery to terminate inflight IOs
on the controller. If CCR succeeds, then switch to RESETTING state and
continue error recovery as usuall by tearing down the controller, and
attempting reconnect to target. If CCR fails, the behavior of recovery
depends on whether CQT is supported or not. If CQT is supported, switch
to time-based recovery by holding inflight IOs until it is safe for them
to be retried. If CQT is not supported proceed to retry requests
immediately, as the code currently does.

Currently, inflight IOs can get completed during time-based recovery.
This will be addressed in the next patch.

Signed-off-by: Mohamed Khalfella <mkhalfella@...estorage.com>
---
 drivers/nvme/host/fc.c | 52 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 8b6a7c80015c..0e4d271bb4b6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -166,7 +166,7 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct blk_mq_tag_set	tag_set;
 
-	struct work_struct	ioerr_work;
+	struct delayed_work	ioerr_work;
 	struct delayed_work	connect_work;
 
 	struct kref		ref;
@@ -1862,11 +1862,48 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 	}
 }
 
+static int nvme_fc_recover_ctrl(struct nvme_ctrl *ctrl)
+{
+	unsigned long rem;
+
+	if (test_and_clear_bit(NVME_CTRL_RECOVERED, &ctrl->flags)) {
+		dev_info(ctrl->device, "completed time-based recovery\n");
+		goto done;
+	}
+
+	rem = nvme_recover_ctrl(ctrl);
+	if (!rem)
+		goto done;
+
+	if (!ctrl->cqt) {
+		dev_info(ctrl->device,
+			  "CCR failed, CQT not supported, skip time-based recovery\n");
+		goto done;
+	}
+
+	dev_info(ctrl->device,
+		 "CCR failed, switch to time-based recovery, timeout = %ums\n",
+		 jiffies_to_msecs(rem));
+
+	set_bit(NVME_CTRL_RECOVERED, &ctrl->flags);
+	queue_delayed_work(nvme_reset_wq, &to_fc_ctrl(ctrl)->ioerr_work, rem);
+	return -EAGAIN;
+
+done:
+	nvme_end_ctrl_recovery(ctrl);
+	return 0;
+}
+
 static void
 nvme_fc_ctrl_ioerr_work(struct work_struct *work)
 {
-	struct nvme_fc_ctrl *ctrl =
-			container_of(work, struct nvme_fc_ctrl, ioerr_work);
+	struct nvme_fc_ctrl *ctrl = container_of(to_delayed_work(work),
+				struct nvme_fc_ctrl, ioerr_work);
+
+	if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_RECOVERING) {
+		if (nvme_fc_recover_ctrl(&ctrl->ctrl))
+			return;
+	}
 
 	nvme_fc_error_recovery(ctrl);
 }
@@ -1892,7 +1929,8 @@ EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid);
 static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
 					 char *errmsg)
 {
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECOVERING) &&
+	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
 		return;
 
 	dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
@@ -3227,7 +3265,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
-	cancel_work_sync(&ctrl->ioerr_work);
+	cancel_delayed_work_sync(&ctrl->ioerr_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 	/*
 	 * kill the association on the link side.  this will block
@@ -3465,7 +3503,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
-	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
+	INIT_DELAYED_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -3563,7 +3601,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 fail_ctrl:
 	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
-	cancel_work_sync(&ctrl->ioerr_work);
+	cancel_delayed_work_sync(&ctrl->ioerr_work);
 	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 
-- 
2.51.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ