[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251126021250.2583630-14-mkhalfella@purestorage.com>
Date: Tue, 25 Nov 2025 18:12:00 -0800
From: Mohamed Khalfella <mkhalfella@...estorage.com>
To: Chaitanya Kulkarni <kch@...dia.com>,
Christoph Hellwig <hch@....de>,
Jens Axboe <axboe@...nel.dk>,
Keith Busch <kbusch@...nel.org>,
Sagi Grimberg <sagi@...mberg.me>
Cc: Aaron Dailey <adailey@...estorage.com>,
Randy Jennings <randyj@...estorage.com>,
John Meneghini <jmeneghi@...hat.com>,
Hannes Reinecke <hare@...e.de>,
linux-nvme@...ts.infradead.org,
linux-kernel@...r.kernel.org,
Mohamed Khalfella <mkhalfella@...estorage.com>
Subject: [RFC PATCH 13/14] nvme-fc: Use CCR to recover controller that hits an error
An alive nvme controller that hits an error will now move to RECOVERING
state instead of RESETTING state. In RECOVERING state, ctrl->err_work
will attempt to use cross-controller recovery to terminate inflight IOs
on the controller. If CCR succeeds, then switch to RESETTING state and
continue error recovery as usuall by tearing down the controller, and
attempting reconnect to target. If CCR fails, the behavior of recovery
depends on whether CQT is supported or not. If CQT is supported, switch
to time-based recovery by holding inflight IOs until it is safe for them
to be retried. If CQT is not supported proceed to retry requests
immediately, as the code currently does.
Currently, inflight IOs can get completed during time-based recovery.
This will be addressed in the next patch.
Signed-off-by: Mohamed Khalfella <mkhalfella@...estorage.com>
---
drivers/nvme/host/fc.c | 52 ++++++++++++++++++++++++++++++++++++------
1 file changed, 45 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 8b6a7c80015c..0e4d271bb4b6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -166,7 +166,7 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct blk_mq_tag_set tag_set;
- struct work_struct ioerr_work;
+ struct delayed_work ioerr_work;
struct delayed_work connect_work;
struct kref ref;
@@ -1862,11 +1862,48 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
}
}
+static int nvme_fc_recover_ctrl(struct nvme_ctrl *ctrl)
+{
+ unsigned long rem;
+
+ if (test_and_clear_bit(NVME_CTRL_RECOVERED, &ctrl->flags)) {
+ dev_info(ctrl->device, "completed time-based recovery\n");
+ goto done;
+ }
+
+ rem = nvme_recover_ctrl(ctrl);
+ if (!rem)
+ goto done;
+
+ if (!ctrl->cqt) {
+ dev_info(ctrl->device,
+ "CCR failed, CQT not supported, skip time-based recovery\n");
+ goto done;
+ }
+
+ dev_info(ctrl->device,
+ "CCR failed, switch to time-based recovery, timeout = %ums\n",
+ jiffies_to_msecs(rem));
+
+ set_bit(NVME_CTRL_RECOVERED, &ctrl->flags);
+ queue_delayed_work(nvme_reset_wq, &to_fc_ctrl(ctrl)->ioerr_work, rem);
+ return -EAGAIN;
+
+done:
+ nvme_end_ctrl_recovery(ctrl);
+ return 0;
+}
+
static void
nvme_fc_ctrl_ioerr_work(struct work_struct *work)
{
- struct nvme_fc_ctrl *ctrl =
- container_of(work, struct nvme_fc_ctrl, ioerr_work);
+ struct nvme_fc_ctrl *ctrl = container_of(to_delayed_work(work),
+ struct nvme_fc_ctrl, ioerr_work);
+
+ if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_RECOVERING) {
+ if (nvme_fc_recover_ctrl(&ctrl->ctrl))
+ return;
+ }
nvme_fc_error_recovery(ctrl);
}
@@ -1892,7 +1929,8 @@ EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid);
static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
char *errmsg)
{
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECOVERING) &&
+ !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return;
dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
@@ -3227,7 +3265,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
- cancel_work_sync(&ctrl->ioerr_work);
+ cancel_delayed_work_sync(&ctrl->ioerr_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/*
* kill the association on the link side. this will block
@@ -3465,7 +3503,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
- INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
+ INIT_DELAYED_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
spin_lock_init(&ctrl->lock);
/* io queue count */
@@ -3563,7 +3601,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
fail_ctrl:
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
- cancel_work_sync(&ctrl->ioerr_work);
+ cancel_delayed_work_sync(&ctrl->ioerr_work);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
--
2.51.2
Powered by blists - more mailing lists