[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250905024659.811386-7-alistair.francis@wdc.com>
Date: Fri, 5 Sep 2025 12:46:58 +1000
From: alistair23@...il.com
To: chuck.lever@...cle.com,
hare@...nel.org,
kernel-tls-handshake@...ts.linux.dev,
netdev@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-doc@...r.kernel.org,
linux-nvme@...ts.infradead.org,
linux-nfs@...r.kernel.org
Cc: kbusch@...nel.org,
axboe@...nel.dk,
hch@....de,
sagi@...mberg.me,
kch@...dia.com,
alistair23@...il.com,
Alistair Francis <alistair.francis@....com>
Subject: [PATCH v2 6/7] nvme-tcp: Support KeyUpdate
From: Alistair Francis <alistair.francis@....com>
If the nvme_tcp_try_send() or nvme_tcp_try_recv() functions return
EKEYEXPIRED then the underlying TLS keys need to be updated. This occurs
on an KeyUpdate event.
If the NVMe Target (TLS server) initiates a KeyUpdate this patch will
allow the NVMe layer to process the KeyUpdate request and forward the
request to userspace. Userspace must then update the key to keep the
connection alive.
This patch allows us to handle the NVMe target sending a KeyUpdate
request without aborting the connection. At this time we don't support
initiating a KeyUpdate.
Link: https://datatracker.ietf.org/doc/html/rfc8446#section-4.6.3
Signed-off-by: Alistair Francis <alistair.francis@....com>
---
v2:
- Don't change the state
- Use a helper function for KeyUpdates
- Continue sending in nvme_tcp_send_all() after a KeyUpdate
- Remove command message using recvmsg
drivers/nvme/host/tcp.c | 73 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 776047a71436..b6449effc2ac 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -171,6 +171,7 @@ struct nvme_tcp_queue {
bool tls_enabled;
u32 rcv_crc;
u32 snd_crc;
+ key_serial_t user_session_id;
__le32 exp_ddgst;
__le32 recv_ddgst;
struct completion tls_complete;
@@ -210,6 +211,7 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
struct nvme_tcp_queue *queue,
key_serial_t pskid,
handshake_key_update_type keyupdate);
+static void update_tls_keys(struct nvme_tcp_queue *queue);
static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
{
@@ -393,6 +395,14 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
do {
ret = nvme_tcp_try_send(queue);
} while (ret > 0);
+
+ if (ret == -EKEYEXPIRED) {
+ update_tls_keys(queue);
+
+ do {
+ ret = nvme_tcp_try_send(queue);
+ } while (ret > 0);
+ }
}
static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue)
@@ -1347,6 +1357,8 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
done:
if (ret == -EAGAIN) {
ret = 0;
+ } else if (ret == -EKEYEXPIRED) {
+ goto out;
} else if (ret < 0) {
dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", ret);
@@ -1371,9 +1383,56 @@ static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
queue->nr_cqe = 0;
consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
+
+ /* If we received EINVAL from read_sock then it generally means the
+ * other side sent a command message. So let's try to clear it from
+ * our queue with a recvmsg, otherwise we get stuck in an infinite
+ * loop.
+ */
+ if (consumed == -EINVAL) {
+ char cbuf[CMSG_LEN(sizeof(char))] = {};
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+ struct bio_vec bvec;
+
+ bvec_set_virt(&bvec, (void *)cbuf, sizeof(cbuf));
+ iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, sizeof(cbuf));
+
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ consumed = sock_recvmsg(sock, &msg, msg.msg_flags);
+ }
+
return consumed == -EAGAIN ? 0 : consumed;
}
+static void update_tls_keys(struct nvme_tcp_queue *queue)
+{
+ int qid = nvme_tcp_queue_id(queue);
+ int ret;
+
+ dev_dbg(queue->ctrl->ctrl.device,
+ "updating key for queue %d\n", qid);
+
+ cancel_work(&queue->io_work);
+ handshake_req_cancel(queue->sock->sk);
+ handshake_sk_destruct_req(queue->sock->sk);
+
+ nvme_stop_keep_alive(&(queue->ctrl->ctrl));
+ flush_work(&(queue->ctrl->ctrl).async_event_work);
+
+ ret = nvme_tcp_start_tls(&(queue->ctrl->ctrl),
+ queue, queue->ctrl->ctrl.tls_pskid,
+ HANDSHAKE_KEY_UPDATE_TYPE_RECEIVED);
+
+ if (ret < 0) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to update the keys %d\n", ret);
+ nvme_tcp_fail_request(queue->request);
+ nvme_tcp_done_send_req(queue);
+ }
+}
+
static void nvme_tcp_io_work(struct work_struct *w)
{
struct nvme_tcp_queue *queue =
@@ -1389,15 +1448,21 @@ static void nvme_tcp_io_work(struct work_struct *w)
mutex_unlock(&queue->send_mutex);
if (result > 0)
pending = true;
- else if (unlikely(result < 0))
+ else if (unlikely(result < 0)) {
+ if (result == -EKEYEXPIRED)
+ update_tls_keys(queue);
break;
+ }
}
result = nvme_tcp_try_recv(queue);
if (result > 0)
pending = true;
- else if (unlikely(result < 0))
- return;
+ else if (unlikely(result < 0)) {
+ if (result == -EKEYEXPIRED)
+ update_tls_keys(queue);
+ break;
+ }
/* did we get some space after spending time in recv? */
if (nvme_tcp_queue_has_pending(queue) &&
@@ -1705,6 +1770,7 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid,
ctrl->ctrl.tls_pskid = key_serial(tls_key);
key_put(tls_key);
queue->tls_err = 0;
+ queue->user_session_id = user_session_id;
}
out_complete:
@@ -1734,6 +1800,7 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
keyring = key_serial(nctrl->opts->keyring);
args.ta_keyring = keyring;
args.ta_timeout_ms = tls_handshake_timeout * 1000;
+ args.user_session_id = queue->user_session_id;
queue->tls_err = -EOPNOTSUPP;
init_completion(&queue->tls_complete);
ret = tls_client_hello_psk(&args, GFP_KERNEL, keyupdate);
--
2.50.1
Powered by blists - more mailing lists