[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20210824141227.808340-5-yukuai3@huawei.com>
Date: Tue, 24 Aug 2021 22:12:26 +0800
From: Yu Kuai <yukuai3@...wei.com>
To: <axboe@...nel.dk>, <josef@...icpanda.com>, <ming.lei@...hat.com>,
<bvanassche@....org>
CC: <linux-block@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<nbd@...er.debian.org>, <yukuai3@...wei.com>, <yi.zhang@...wei.com>
Subject: [PATCH v3 4/5] nbd: make sure request completion won't concurrent
commit cddce0116058 ("nbd: Aovid double completion of a request")
try to fix that nbd_clear_que() and recv_work() can complete a
request concurrently. However, the problem still exists:
t1 t2 t3
nbd_disconnect_and_put
flush_workqueue
recv_work
blk_mq_complete_request
blk_mq_complete_request_remote -> this is true
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE)
blk_mq_raise_softirq
blk_done_softirq
blk_complete_reqs
nbd_complete_rq
blk_mq_end_request
blk_mq_free_request
WRITE_ONCE(rq->state, MQ_RQ_IDLE)
nbd_clear_que
blk_mq_tagset_busy_iter
nbd_clear_req
__blk_mq_free_request
blk_mq_put_tag
blk_mq_complete_request
There are three places where request can be completed in nbd:
recv_work(), nbd_clear_que() and nbd_xmit_timeout(). Since they
all hold cmd->lock before completing the request, it's easy to
avoid the problem by setting and checking a cmd flag.
Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
drivers/block/nbd.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7b9e19675224..4d5098d01758 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -416,12 +416,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
+ bool need_complete;
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
+ need_complete =
+ test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
goto done;
}
@@ -490,11 +493,13 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
+ need_complete = test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
done:
- blk_mq_complete_request(req);
+ if (need_complete)
+ blk_mq_complete_request(req);
return BLK_EH_DONE;
}
@@ -849,6 +854,7 @@ static void recv_work(struct work_struct *work)
static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+ bool need_complete;
/* don't abort one completed request */
if (blk_mq_request_completed(req))
@@ -856,9 +862,11 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
+ need_complete = test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
- blk_mq_complete_request(req);
+ if (need_complete)
+ blk_mq_complete_request(req);
return true;
}
--
2.31.1
Powered by blists - more mailing lists