lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250324174909.3919131-8-mkhalfella@purestorage.com>
Date: Mon, 24 Mar 2025 10:49:00 -0700
From: Mohamed Khalfella <mkhalfella@...estorage.com>
To: Christoph Hellwig <hch@....de>,
	Sagi Grimberg <sagi@...mberg.me>,
	Keith Busch <kbusch@...nel.org>
Cc: Hannes Reinecke <hare@...e.de>,
	Daniel Wagner <wagi@...nel.org>,
	John Meneghini <jmeneghi@...hat.com>,
	randyj@...estorage.com,
	adailey@...estorage.com,
	jrani@...estorage.com,
	linux-nvme@...ts.infradead.org,
	linux-kernel@...r.kernel.org,
	mkhalfella@...estorage.com
Subject: [RFC PATCH v1 7/7] nvme-tcp: Do not immediately cancel inflight requests during recovery

In case of controller recovery, reset, or deletion it is possible there
are inflight requests for which no response was received from target
controller. TP4129 requires such inflight requests to not be canceled
immediately. Instead these requests should be held until the target
learns about disconnection and quiesce pending nvme commands. Implement
this for nvme-tcp.

Signed-off-by: Mohamed Khalfella <mkhalfella@...estorage.com>
---
 drivers/nvme/host/tcp.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 327e37a25281..822e6329e332 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2252,12 +2252,14 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
 	return error;
 }
 
-static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl)
+static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
+					  bool hold_reqs)
 {
 	nvme_quiesce_admin_queue(ctrl);
 	blk_sync_queue(ctrl->admin_q);
 	nvme_tcp_stop_queue(ctrl, 0);
-	nvme_cancel_admin_tagset(ctrl);
+	if (!hold_reqs)
+		nvme_cancel_admin_tagset(ctrl);
 	nvme_tcp_free_admin_queue(ctrl);
 	if (ctrl->tls_pskid) {
 		dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n",
@@ -2274,12 +2276,14 @@ static void nvme_tcp_suspend_io_queues(struct nvme_ctrl *ctrl)
 	nvme_sync_io_queues(ctrl);
 }
 
-static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl)
+static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
+					bool hold_reqs)
 {
 	if (ctrl->queue_count <= 1)
 		return;
 	nvme_tcp_stop_io_queues(ctrl);
-	nvme_cancel_tagset(ctrl);
+	if (!hold_reqs)
+		nvme_cancel_tagset(ctrl);
 	nvme_tcp_free_io_queues(ctrl);
 }
 
@@ -2375,7 +2379,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 	}
 destroy_admin:
 	nvme_stop_keep_alive(ctrl);
-	nvme_tcp_teardown_admin_queue(ctrl);
+	nvme_tcp_teardown_admin_queue(ctrl, false);
 	if (new) {
 		nvme_unquiesce_admin_queue(ctrl);
 		nvme_remove_admin_tag_set(ctrl);
@@ -2418,10 +2422,12 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	nvme_tcp_suspend_io_queues(ctrl);
-	nvme_tcp_teardown_io_queues(ctrl);
+	nvme_tcp_teardown_io_queues(ctrl, true);
+	nvme_tcp_teardown_admin_queue(ctrl, true);
+	if (nvme_queue_held_requests_work(ctrl))
+		nvme_wait_for_held_requests(ctrl);
 	/* unquiesce to fail fast pending requests */
 	nvme_unquiesce_io_queues(ctrl);
-	nvme_tcp_teardown_admin_queue(ctrl);
 	nvme_unquiesce_admin_queue(ctrl);
 	nvme_auth_stop(ctrl);
 
@@ -2439,11 +2445,15 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
 {
+	bool hold_reqs;
+
 	nvme_tcp_suspend_io_queues(ctrl);
-	nvme_tcp_teardown_io_queues(ctrl);
 	nvme_quiesce_admin_queue(ctrl);
-	nvme_disable_ctrl(ctrl, shutdown);
-	nvme_tcp_teardown_admin_queue(ctrl);
+	hold_reqs = nvme_disable_ctrl(ctrl, shutdown);
+	nvme_tcp_teardown_io_queues(ctrl, hold_reqs);
+	nvme_tcp_teardown_admin_queue(ctrl, hold_reqs);
+	if (hold_reqs && nvme_queue_held_requests_work(ctrl))
+		nvme_wait_for_held_requests(ctrl);
 }
 
 static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ