lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e34b522c-a492-4d86-9fbd-1a667e26d884@suse.de>
Date: Tue, 2 Sep 2025 11:13:41 +0200
From: Hannes Reinecke <hare@...e.de>
To: Daniel Wagner <wagi@...nel.org>, Keith Busch <kbusch@...nel.org>,
 Christoph Hellwig <hch@....de>, Sagi Grimberg <sagi@...mberg.me>,
 James Smart <james.smart@...adcom.com>
Cc: Shinichiro Kawasaki <shinichiro.kawasaki@....com>,
 linux-nvme@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 4/4] nvme-fc: wait for initial connect attempt to
 finish

On 8/29/25 17:37, Daniel Wagner wrote:
> The TCP and RDMA transport are doing synchronous connects. Thus the
> write to /dev/nvme-fabrics blocks and will return either success or
> fail. The FC transport offloads the connect attempt to a workqueue and
> thus it's an asynchronous operation. The write will succeeds even though
> the connection attempt is ongoing.
> 
> This async connect feature was introduced to mitigate problems with
> transient connect errors and the task to coordinate retries with
> userspace (nvme-cli).
> 
> Unfortunately, this makes the transports behave differently. Streamline
> nvme-fc to wait for the initial connection attempt.
> 
> In order to support also the async connection attempt introduce a new
> flag for userspace, which is an opt-in feature. This avoids breaking
> existing users which are not ready for the FC transport behavior change.
> 
> Link: https://lore.kernel.org/linux-nvme/0605ac36-16d5-2026-d3c6-62d346db6dfb@gmail.com/
> Signed-off-by: Daniel Wagner <wagi@...nel.org>
> ---
>   drivers/nvme/host/fabrics.c | 18 +++++++++++++++++-
>   drivers/nvme/host/fabrics.h |  3 +++
>   drivers/nvme/host/fc.c      | 36 +++++++++++++++++++++++++++++++++++-
>   3 files changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index cb9311c399856de54a2e4d41d41d295dd1aef31e..73e7a1e7925ef2e8ad633e8e2bd36207181dcbb6 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -709,6 +709,7 @@ static const match_table_t opt_tokens = {
>   	{ NVMF_OPT_TLS,			"tls"			},
>   	{ NVMF_OPT_CONCAT,		"concat"		},
>   #endif
> +	{ NVMF_OPT_CONNECT_ASYNC,	"connect_async=%d"	},
>   	{ NVMF_OPT_ERR,			NULL			}
>   };
>   
> @@ -738,6 +739,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   	opts->tls_key = NULL;
>   	opts->keyring = NULL;
>   	opts->concat = false;
> +	/* keep backward compatibility with older nvme-cli */
> +	opts->connect_async = true;
>   
>   	options = o = kstrdup(buf, GFP_KERNEL);
>   	if (!options)
> @@ -1064,6 +1067,19 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   			}
>   			opts->concat = true;
>   			break;
> +		case NVMF_OPT_CONNECT_ASYNC:
> +			if (match_int(args, &token)) {
> +				ret = -EINVAL;
> +				goto out;
> +			}
> +			if (token < 0 || token > 1) {
> +				pr_err("Invalid connect_async %d value\n",
> +				       token);
> +				ret = -EINVAL;
> +				goto out;
> +			}
> +			opts->connect_async = token;
> +			break;
>   		default:
>   			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
>   				p);
> @@ -1316,7 +1332,7 @@ EXPORT_SYMBOL_GPL(nvmf_ctrl_options_put);
>   				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
>   				 NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
>   				 NVMF_OPT_FAIL_FAST_TMO | NVMF_OPT_DHCHAP_SECRET |\
> -				 NVMF_OPT_DHCHAP_CTRL_SECRET)
> +				 NVMF_OPT_DHCHAP_CTRL_SECRET | NVMF_OPT_CONNECT_ASYNC)
>   
>   static struct nvme_ctrl *
>   nvmf_create_ctrl(struct device *dev, const char *buf)
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index bcc1d5f97ee5a03852830bf3ba0a15f82c7c2c03..9015bb3f63e1d0c412cf4af5194a42411fbb516c 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -67,6 +67,7 @@ enum {
>   	NVMF_OPT_KEYRING	= 1 << 26,
>   	NVMF_OPT_TLS_KEY	= 1 << 27,
>   	NVMF_OPT_CONCAT		= 1 << 28,
> +	NVMF_OPT_CONNECT_ASYNC	= 1 << 29,
>   };
>   
>   /**
> @@ -111,6 +112,7 @@ enum {
>    * @nr_poll_queues: number of queues for polling I/O
>    * @tos: type of service
>    * @fast_io_fail_tmo: Fast I/O fail timeout in seconds
> + * @connect_async: Don't wait for the initial connect attempt to succeed or fail
>    */
>   struct nvmf_ctrl_options {
>   	struct kref		ref;
> @@ -142,6 +144,7 @@ struct nvmf_ctrl_options {
>   	unsigned int		nr_poll_queues;
>   	int			tos;
>   	int			fast_io_fail_tmo;
> +	bool			connect_async;
>   };
>   
>   int nvmf_ctrl_options_get(struct nvmf_ctrl_options *opts);
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index 5d9e193bd2525ae1a2f08e2a0a16ca41e08a7304..ccd67acb55e216602010933914afca77248b3de0 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -148,6 +148,7 @@ struct nvme_fc_rport {
>   #define ASSOC_ACTIVE		0
>   #define ASSOC_FAILED		1
>   #define FCCTRL_TERMIO		2
> +#define INITIAL_SYNC_CONNECT	3
>   
>   struct nvme_fc_ctrl {
>   	spinlock_t		lock;
> @@ -168,6 +169,7 @@ struct nvme_fc_ctrl {
>   
>   	struct work_struct	ioerr_work;
>   	struct delayed_work	connect_work;
> +	struct completion	connect_completion;
>   
>   	struct kref		ref;
>   	unsigned long		flags;
> @@ -829,6 +831,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
>   			dev_warn(ctrl->ctrl.device,
>   				"NVME-FC{%d}: controller connectivity lost.\n",
>   				ctrl->cnum);
> +			complete(&ctrl->connect_completion);
>   			nvme_fc_ctrl_put(ctrl);
>   		} else
>   			nvme_fc_ctrl_connectivity_loss(ctrl);
> @@ -3253,6 +3256,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
>   	if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
>   		return;
>   
> +	if (test_bit(INITIAL_SYNC_CONNECT, &ctrl->flags))
> +		goto delete;
> +
>   	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
>   		dev_info(ctrl->ctrl.device,
>   			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
> @@ -3294,6 +3300,8 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
>   					   (ctrl->ctrl.opts->max_reconnects *
>   					    ctrl->ctrl.opts->reconnect_delay)));
>   
> +delete:
> +	complete(&ctrl->connect_completion);
>   	nvme_fc_ctrl_put(ctrl);
>   }
>   
> @@ -3353,10 +3361,14 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
>   	ret = nvme_fc_create_association(ctrl);
>   	if (ret)
>   		nvme_fc_reconnect_or_delete(ctrl, ret);
> -	else
> +	else {
>   		dev_info(ctrl->ctrl.device,
>   			"NVME-FC{%d}: controller connect complete\n",
>   			ctrl->cnum);
> +		pr_info("%s:%d clear initial sync connect bit\n", __func__, __LINE__);
> +		clear_bit(INITIAL_SYNC_CONNECT, &ctrl->flags);
> +		complete(&ctrl->connect_completion);
> +	}
>   }
>   
>   
> @@ -3461,6 +3473,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>   
>   	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
>   	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
> +	init_completion(&ctrl->connect_completion);
>   	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
>   	spin_lock_init(&ctrl->lock);
>   
> @@ -3539,6 +3552,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>   	list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
>   	spin_unlock_irqrestore(&rport->lock, flags);
>   
> +	if (!opts->connect_async) {
> +		pr_info("%s:%d set initial connect bit\n", __func__, __LINE__);
> +		set_bit(INITIAL_SYNC_CONNECT, &ctrl->flags);
> +		nvme_fc_ctrl_get(ctrl);
> +	}
> +
>   	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
>   		dev_err(ctrl->ctrl.device,
>   			"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
> @@ -3554,6 +3573,20 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>   
>   	flush_delayed_work(&ctrl->connect_work);
>   
> +	if (!opts->connect_async) {
> +		enum nvme_ctrl_state state;
> +
> +		wait_for_completion(&ctrl->connect_completion);
> +		state = nvme_ctrl_state(&ctrl->ctrl);
> +		nvme_fc_ctrl_put(ctrl);
> +
> +		if (state != NVME_CTRL_LIVE) {
> +			/* Cleanup is handled by the connect state machine */
> +			pr_info("%s:%d ctrl state %d\n", __func__, __LINE__, state);
> +			return ERR_PTR(-EIO);

We really should return the correct status (and not just -EIO).
Guess we'll need to introduce another variable in struct nvme_fc_ctrl
to hold the connect status...

> +		}
> +	}
> +
>   	dev_info(ctrl->ctrl.device,
>   		"NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n",
>   		ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn);
> @@ -3895,6 +3928,7 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
>   		dev_warn(ctrl->ctrl.device,
>   			"NVME-FC{%d}: transport unloading: deleting ctrl\n",
>   			ctrl->cnum);
> +		complete(&ctrl->connect_completion);
>   		nvme_fc_ctrl_put(ctrl);
>   	}
>   	spin_unlock(&rport->lock);
> 

And I wonder: what about the udev rules?
Do they need to be modified?
(IE: should we call udev with --connect-async or without?)

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@...e.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ