[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fe62af4b-718c-423d-918a-d05acdadb980@suse.de>
Date: Tue, 3 Feb 2026 06:07:35 +0100
From: Hannes Reinecke <hare@...e.de>
To: Mohamed Khalfella <mkhalfella@...estorage.com>,
Justin Tee <justin.tee@...adcom.com>,
Naresh Gottumukkala <nareshgottumukkala83@...il.com>,
Paul Ely <paul.ely@...adcom.com>, Chaitanya Kulkarni <kch@...dia.com>,
Christoph Hellwig <hch@....de>, Jens Axboe <axboe@...nel.dk>,
Keith Busch <kbusch@...nel.org>, Sagi Grimberg <sagi@...mberg.me>
Cc: Aaron Dailey <adailey@...estorage.com>,
Randy Jennings <randyj@...estorage.com>,
Dhaval Giani <dgiani@...estorage.com>, linux-nvme@...ts.infradead.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 07/14] nvme: Introduce FENCING and FENCED controller
states
On 1/30/26 23:34, Mohamed Khalfella wrote:
> FENCING is a new controller state that a LIVE controller enter when an
> error is encountered. While in FENCING state inflight IOs that timeout
> are not canceled because they should be held until either CCR succeeds
> or time-based recovery completes. While the queues remain alive requests
> are not allowed to be sent in this state and the controller can not be
> reset of deleted. This is intentional because resetting or deleting the
> controller results in canceling inflight IOs.
>
> FENCED is a short-term state the controller enters before it is reset.
> It exists only to prevent manual resets to happen while controller is
> in FENCING state.
>
> Signed-off-by: Mohamed Khalfella <mkhalfella@...estorage.com>
> ---
> drivers/nvme/host/core.c | 25 +++++++++++++++++++++++--
> drivers/nvme/host/nvme.h | 4 ++++
> drivers/nvme/host/sysfs.c | 2 ++
> 3 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8961d612ccb0..3e1e02822dd4 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -574,10 +574,29 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
> break;
> }
> break;
> + case NVME_CTRL_FENCING:
> + switch (old_state) {
> + case NVME_CTRL_LIVE:
> + changed = true;
> + fallthrough;
> + default:
> + break;
> + }
> + break;
> + case NVME_CTRL_FENCED:> + switch (old_state) {
> + case NVME_CTRL_FENCING:
> + changed = true;
> + fallthrough;
> + default:
> + break;
> + }
> + break;
> case NVME_CTRL_RESETTING:
> switch (old_state) {
> case NVME_CTRL_NEW:
> case NVME_CTRL_LIVE:
> + case NVME_CTRL_FENCED:
> changed = true;
> fallthrough;
> default:
> @@ -760,6 +779,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
>
> if (state != NVME_CTRL_DELETING_NOIO &&
> state != NVME_CTRL_DELETING &&
> + state != NVME_CTRL_FENCING &&
Shouldn't 'FENCED' be in here, too?
> state != NVME_CTRL_DEAD &&
> !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
> !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
> @@ -802,10 +822,11 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
> req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
> return true;
> break;
> - default:
> - break;
> + case NVME_CTRL_FENCING:
Similar here.
> case NVME_CTRL_DEAD:
> return false;
> + default:
> + break;
> }
> }
>
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9dd9f179ad88..00866bbc66f3 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -251,6 +251,8 @@ static inline u16 nvme_req_qid(struct request *req)
> enum nvme_ctrl_state {
> NVME_CTRL_NEW,
> NVME_CTRL_LIVE,
> + NVME_CTRL_FENCING,
> + NVME_CTRL_FENCED,
> NVME_CTRL_RESETTING,
> NVME_CTRL_CONNECTING,
> NVME_CTRL_DELETING,
> @@ -777,6 +779,8 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
> switch (nvme_ctrl_state(ctrl)) {
> case NVME_CTRL_NEW:
> case NVME_CTRL_LIVE:
> + case NVME_CTRL_FENCING:
> + case NVME_CTRL_FENCED:
> case NVME_CTRL_RESETTING:
> case NVME_CTRL_CONNECTING:
> return false;
> diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> index f81bbb6ec768..4ec9dfeb736e 100644
> --- a/drivers/nvme/host/sysfs.c
> +++ b/drivers/nvme/host/sysfs.c
> @@ -443,6 +443,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
> static const char *const state_name[] = {
> [NVME_CTRL_NEW] = "new",
> [NVME_CTRL_LIVE] = "live",
> + [NVME_CTRL_FENCING] = "fencing",
> + [NVME_CTRL_FENCED] = "fenced",
> [NVME_CTRL_RESETTING] = "resetting",
> [NVME_CTRL_CONNECTING] = "connecting",
> [NVME_CTRL_DELETING] = "deleting",
You need to modify nvme-tcp.c:nvme_tcp_timeout() too, as this checks
'just' for 'LIVE' state and will abort/terminate commands when in
FENCING. Similar argument for nvme-rdma.c. And nvme-fc.c also needs an
audit to ensure it works correctly.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@...e.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Powered by blists - more mailing lists