[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <adf88c55-64b0-6138-fea8-797d2b30b770@grimberg.me>
Date: Mon, 13 Mar 2023 14:08:43 +0200
From: Sagi Grimberg <sagi@...mberg.me>
To: zhangyanjun@...tc.cn, kbusch@...nel.org, axboe@...com, hch@....de
Cc: linux-kernel@...r.kernel.org, linux-nvme@...ts.infradead.org,
xiang.li@...tc.cn, shaopeijie@...tc.cn
Subject: Re: [RFC PATCH] nvme-tcp: fix possible crash when only partilal
queues are allocated by nvme_tcp_alloc_queue
> So we query that directly freeing these allocated
>> I/O queues may result in this issue. To avoid this issuse, we try to make
>> the following code modifications, but have not verified to be valid yet
>> because of the occasionality of this issue.
>>
>> Now, we can not confirm the caller of queuing on the io_work during
>> allocating all the I/O queues, but we doubt that the socket interface
>> sk_data_ready=nvme_tcp_data_ready initialized by nvme_tcp_alloc_queue
>> may do this action in this interval. Hope to get some suggestions,
>> Thanks!
>
> I would start by determining that the running io_work is an io queue or
> the admin queue. It is also possible that the admin queue cleanup after
> we io queues allocation failed is the problematic teardown that is
> causing this bug.
>
>>
>> Signed-off-by: Yanjun Zhang <zhangyanjun@...tc.cn>
>> ---
>> drivers/nvme/host/tcp.c | 7 +++++--
>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>> index 9b47dcb2a..5782183b8 100644
>> --- a/drivers/nvme/host/tcp.c
>> +++ b/drivers/nvme/host/tcp.c
>> @@ -1761,6 +1761,7 @@ static int nvme_tcp_alloc_admin_queue(struct
>> nvme_ctrl *ctrl)
>> static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
>> {
>> int i, ret;
>> + struct nvme_tcp_ctrl *tcp_ctrl = NULL;
>> for (i = 1; i < ctrl->queue_count; i++) {
>> ret = nvme_tcp_alloc_queue(ctrl, i);
>> @@ -1771,9 +1772,11 @@ static int __nvme_tcp_alloc_io_queues(struct
>> nvme_ctrl *ctrl)
>> return 0;
>> out_free_queues:
>> - for (i--; i >= 1; i--)
>> + for (i--; i >= 1; i--) {
>> + tcp_ctrl = to_tcp_ctrl(ctrl);
>> + __nvme_tcp_stop_queue(&tcp_ctrl->queues[i]);
>> nvme_tcp_free_queue(ctrl, i);
>> -
>> + }
>> return ret;
>> }
>
> The crash dump reminds me of a reported issue fixed by:
> 160f3549a907 ("nvme-tcp: fix UAF when detecting digest errors")
>
> Is this applied in your host kernel?
>
> I'd also add this trace to your reproduction attempts:
> --
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 0ec7d3329595..0cbc8afb41c8 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -919,6 +919,10 @@ static void nvme_tcp_data_ready(struct sock *sk)
>
> read_lock_bh(&sk->sk_callback_lock);
> queue = sk->sk_user_data;
> + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) {
> + pr_err("got data on a deallocated queue %d\n",
> nvme_tcp_queue_id(queue));
> + WARN_ON_ONCE(1);
> + }
> if (likely(queue && queue->rd_enabled) &&
> !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
> queue_work_on(queue->io_cpu, nvme_tcp_wq,
> &queue->io_work);
> --
If indeed this is the io_queue and for some reason it is running,
perhaps the correct fix would be to set the socket ops later in
start_queue. It will also be symmetric because we clear them in
stop_queue:
--
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 0ec7d3329595..b3b1e39966c0 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1617,22 +1617,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl
*nctrl, int qid)
if (ret)
goto err_init_connect;
- queue->rd_enabled = true;
set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
- nvme_tcp_init_recv_ctx(queue);
-
- write_lock_bh(&queue->sock->sk->sk_callback_lock);
- queue->sock->sk->sk_user_data = queue;
- queue->state_change = queue->sock->sk->sk_state_change;
- queue->data_ready = queue->sock->sk->sk_data_ready;
- queue->write_space = queue->sock->sk->sk_write_space;
- queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
- queue->sock->sk->sk_state_change = nvme_tcp_state_change;
- queue->sock->sk->sk_write_space = nvme_tcp_write_space;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- queue->sock->sk->sk_ll_usec = 1;
-#endif
- write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
@@ -1690,6 +1675,21 @@ static int nvme_tcp_start_queue(struct nvme_ctrl
*nctrl, int idx)
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
int ret;
+ queue->rd_enabled = true;
+ nvme_tcp_init_recv_ctx(queue);
+ write_lock_bh(&queue->sock->sk->sk_callback_lock);
+ queue->sock->sk->sk_user_data = queue;
+ queue->state_change = queue->sock->sk->sk_state_change;
+ queue->data_ready = queue->sock->sk->sk_data_ready;
+ queue->write_space = queue->sock->sk->sk_write_space;
+ queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
+ queue->sock->sk->sk_state_change = nvme_tcp_state_change;
+ queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ queue->sock->sk->sk_ll_usec = 1;
+#endif
+ write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+
if (idx)
ret = nvmf_connect_io_queue(nctrl, idx);
else
--
Powered by blists - more mailing lists