linux-kernel - Re: [PATCH v2 12/14] nvme-fc: Decouple error recovery from controller reset

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <692717f0-d0c7-4674-8e65-f8bae8dad4fd@suse.de>
Date: Tue, 3 Feb 2026 06:40:28 +0100
From: Hannes Reinecke <hare@...e.de>
To: Mohamed Khalfella <mkhalfella@...estorage.com>,
 Justin Tee <justin.tee@...adcom.com>,
 Naresh Gottumukkala <nareshgottumukkala83@...il.com>,
 Paul Ely <paul.ely@...adcom.com>, Chaitanya Kulkarni <kch@...dia.com>,
 Christoph Hellwig <hch@....de>, Jens Axboe <axboe@...nel.dk>,
 Keith Busch <kbusch@...nel.org>, Sagi Grimberg <sagi@...mberg.me>
Cc: Aaron Dailey <adailey@...estorage.com>,
 Randy Jennings <randyj@...estorage.com>,
 Dhaval Giani <dgiani@...estorage.com>, linux-nvme@...ts.infradead.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 12/14] nvme-fc: Decouple error recovery from controller
 reset

On 1/30/26 23:34, Mohamed Khalfella wrote:
> nvme_fc_error_recovery() called from nvme_fc_timeout() while controller
> in CONNECTING state results in deadlock reported in link below. Update
> nvme_fc_timeout() to schedule error recovery to avoid the deadlock.
> 
> Previous to this change if controller was LIVE error recovery resets
> the controller and this does not match nvme-tcp and nvme-rdma. Decouple
> error recovery from controller reset to match other fabric transports.
> 
> Link: https://lore.kernel.org/all/20250529214928.2112990-1-mkhalfella@purestorage.com/
> Signed-off-by: Mohamed Khalfella <mkhalfella@...estorage.com>
> ---
>   drivers/nvme/host/fc.c | 94 ++++++++++++++++++------------------------
>   1 file changed, 41 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index 6948de3f438a..f8f6071b78ed 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -227,6 +227,8 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
>   static struct device *fc_udev_device;
>   
>   static void nvme_fc_complete_rq(struct request *rq);
> +static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
> +					 char *errmsg);
>   
>   /* *********************** FC-NVME Port Management ************************ */
>   
> @@ -788,7 +790,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
>   		"Reconnect", ctrl->cnum);
>   
>   	set_bit(ASSOC_FAILED, &ctrl->flags);
> -	nvme_reset_ctrl(&ctrl->ctrl);
> +	nvme_fc_start_ioerr_recovery(ctrl, "Connectivity Loss");
>   }
>   
>   /**
> @@ -985,7 +987,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
>   static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
>   static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
>   
> -static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
> +static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl);
>   
>   static void
>   __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
> @@ -1567,9 +1569,8 @@ nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
>   	 * for the association have been ABTS'd by
>   	 * nvme_fc_delete_association().
>   	 */
> -
> -	/* fail the association */
> -	nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
> +	nvme_fc_start_ioerr_recovery(ctrl,
> +				     "Disconnect Association LS received");
>   
>   	/* release the reference taken by nvme_fc_match_disconn_ls() */
>   	nvme_fc_ctrl_put(ctrl);
> @@ -1871,7 +1872,7 @@ nvme_fc_ctrl_ioerr_work(struct work_struct *work)
>   	struct nvme_fc_ctrl *ctrl =
>   			container_of(work, struct nvme_fc_ctrl, ioerr_work);
>   
> -	nvme_fc_error_recovery(ctrl, "transport detected io error");
> +	nvme_fc_error_recovery(ctrl);
>   }
>   
>   /*
> @@ -1892,6 +1893,17 @@ char *nvme_fc_io_getuuid(struct nvmefc_fcp_req *req)
>   }
>   EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid);
>   
> +static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
> +					 char *errmsg)
> +{
> +	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
> +		return;
> +
> +	dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
> +		 ctrl->cnum, errmsg);
> +	queue_work(nvme_reset_wq, &ctrl->ioerr_work);
> +}
> +
>   static void
>   nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
>   {
> @@ -2049,9 +2061,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
>   		nvme_fc_complete_rq(rq);
>   
>   check_error:
> -	if (terminate_assoc &&
> -	    nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING)
> -		queue_work(nvme_reset_wq, &ctrl->ioerr_work);
> +	if (terminate_assoc)
> +		nvme_fc_start_ioerr_recovery(ctrl, "io error");
>   }
>   
>   static int
> @@ -2495,39 +2506,6 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
>   		nvme_unquiesce_admin_queue(&ctrl->ctrl);
>   }
>   
> -static void
> -nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
> -{
> -	enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
> -
> -	/*
> -	 * if an error (io timeout, etc) while (re)connecting, the remote
> -	 * port requested terminating of the association (disconnect_ls)
> -	 * or an error (timeout or abort) occurred on an io while creating
> -	 * the controller.  Abort any ios on the association and let the
> -	 * create_association error path resolve things.
> -	 */
> -	if (state == NVME_CTRL_CONNECTING) {
> -		__nvme_fc_abort_outstanding_ios(ctrl, true);
> -		dev_warn(ctrl->ctrl.device,
> -			"NVME-FC{%d}: transport error during (re)connect\n",
> -			ctrl->cnum);
> -		return;
> -	}
> -
> -	/* Otherwise, only proceed if in LIVE state - e.g. on first error */
> -	if (state != NVME_CTRL_LIVE)
> -		return;
> -
> -	dev_warn(ctrl->ctrl.device,
> -		"NVME-FC{%d}: transport association event: %s\n",
> -		ctrl->cnum, errmsg);
> -	dev_warn(ctrl->ctrl.device,
> -		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
> -
> -	nvme_reset_ctrl(&ctrl->ctrl);
> -}
> -
>   static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
>   {
>   	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
> @@ -2536,24 +2514,14 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
>   	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
>   	struct nvme_command *sqe = &cmdiu->sqe;
>   
> -	/*
> -	 * Attempt to abort the offending command. Command completion
> -	 * will detect the aborted io and will fail the connection.
> -	 */
>   	dev_info(ctrl->ctrl.device,
>   		"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: "
>   		"x%08x/x%08x\n",
>   		ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype,
>   		nvme_fabrics_opcode_str(qnum, sqe),
>   		sqe->common.cdw10, sqe->common.cdw11);
> -	if (__nvme_fc_abort_op(ctrl, op))
> -		nvme_fc_error_recovery(ctrl, "io timeout abort failed");
>   
> -	/*
> -	 * the io abort has been initiated. Have the reset timer
> -	 * restarted and the abort completion will complete the io
> -	 * shortly. Avoids a synchronous wait while the abort finishes.
> -	 */
> +	nvme_fc_start_ioerr_recovery(ctrl, "io timeout");
>   	return BLK_EH_RESET_TIMER;
>   }
>   
> @@ -3352,6 +3320,26 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
>   	}
>   }
>   
> +static void
> +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl)
> +{
> +	nvme_stop_keep_alive(&ctrl->ctrl);
> +	nvme_stop_ctrl(&ctrl->ctrl);
> +
> +	/* will block while waiting for io to terminate */
> +	nvme_fc_delete_association(ctrl);
> +
> +	/* Do not reconnect if controller is being deleted */
> +	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
> +		return;
> +
> +	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
> +		queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
> +		return;
> +	}
> +
> +	nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN);
> +}
>   
>   static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
>   	.name			= "fc",

I really don't get it. Why do you need to do additional steps here, when
all you do is split an existing function in half?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@...e.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich