[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <cb74516ed445c47cecd0e9500f5766d6e8615e83.1694592708.git.jacky_gam_2001@163.com>
Date: Wed, 13 Sep 2023 16:36:43 +0800
From: Ping Gan <jacky_gam_2001@....com>
To: kbusch@...nel.org, axboe@...nel.dk, hch@....de, sagi@...mberg.me,
kch@...dia.com, linux-kernel@...r.kernel.org,
linux-nvme@...ts.infradead.org
Cc: ping_gan@...l.com, jacky_gam_2001@....com
Subject: [PATCH 3/4] nvmet: support bio polling queue request
If enabling bio polling queue task, we will split and chain
the bios if needed, then fill the request to the lossless ring
of polling queue task.
Signed-off-by: Ping Gan <jacky_gam_2001@....com>
---
drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++++++++++++++++++++----
1 file changed, 214 insertions(+), 29 deletions(-)
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 468833675cc9..6f7d04ae6cb7 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -184,6 +184,16 @@ static void nvmet_bio_done(struct bio *bio)
nvmet_req_bio_put(req, bio);
}
+static void nvmet_pqt_bio_done(struct bio *bio)
+{
+ struct nvmet_pqt_bio_req *req_done = bio->bi_private;
+
+ nvmet_req_complete(req_done->req, blk_to_nvme_status(req_done->req,
+ bio->bi_status));
+ nvmet_req_bio_put(req_done->req, bio);
+ req_done->io_completed = 1;
+}
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
struct sg_mapping_iter *miter)
@@ -237,6 +247,38 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#ifdef CONFIG_NVME_MULTIPATH
+extern struct block_device *nvme_mpath_get_bdev(struct block_device *bdev);
+extern const struct block_device_operations nvme_ns_head_ops;
+#endif
+
+static inline int nvmet_chain_par_bio(struct nvmet_req *req, struct bio **bio,
+ struct sg_mapping_iter *prot_miter, struct block_device *bdev,
+ sector_t sector, struct bio_list *blist)
+{
+ struct bio *parent, *child;
+ unsigned int vec_cnt;
+ int rc;
+
+ parent = *bio;
+ vec_cnt = queue_max_segments(bdev->bd_disk->queue);
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, parent,
+ prot_miter);
+ if (unlikely(rc))
+ return rc;
+ }
+ child = bio_alloc(bdev, vec_cnt, parent->bi_opf, GFP_KERNEL);
+ child->bi_iter.bi_sector = sector;
+ *bio = child;
+ bio_chain(*bio, parent);
+ parent->bi_opf |= REQ_POLLED;
+ parent->bi_opf |= REQ_NOWAIT;
+ parent->bi_opf |= REQ_NOMERGE;
+ bio_list_add(blist, parent);
+ return 0;
+}
+
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
unsigned int sg_cnt = req->sg_cnt;
@@ -247,8 +289,13 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
blk_opf_t opf;
int i, rc;
struct sg_mapping_iter prot_miter;
- unsigned int iter_flags;
+ unsigned int iter_flags, max_sectors;
+ unsigned short vec_cnt, max_segments;
unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
+ bool pqt_enabled = nvmet_pqt_enabled();
+ unsigned int sg_len;
+ struct nvmet_pqt_bio_req *req_done = NULL;
+ struct block_device *bdev = req->ns->bdev;
if (!nvmet_check_transfer_len(req, total_len))
return;
@@ -268,6 +315,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
iter_flags = SG_MITER_FROM_SG;
}
+#ifdef CONFIG_NVME_MULTIPATH
+ if (pqt_enabled && bdev->bd_disk->fops == &nvme_ns_head_ops) {
+ bdev = nvme_mpath_get_bdev(bdev);
+ if (!bdev) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+ opf |= REQ_DRV;
+ }
+#endif
+ if (pqt_enabled) {
+ req_done = kmalloc(sizeof(struct nvmet_pqt_bio_req), GFP_KERNEL);
+ if (!req_done) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+ }
+
if (is_pci_p2pdma_page(sg_page(req->sg)))
opf |= REQ_NOMERGE;
@@ -278,54 +343,174 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio_init(bio, req->ns->bdev, req->inline_bvec,
ARRAY_SIZE(req->inline_bvec), opf);
} else {
- bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
+ vec_cnt = bio_max_segs(sg_cnt);
+ if (pqt_enabled)
+ vec_cnt = queue_max_segments(bdev->bd_disk->queue);
+ bio = bio_alloc(bdev, vec_cnt, opf,
GFP_KERNEL);
}
bio->bi_iter.bi_sector = sector;
- bio->bi_private = req;
- bio->bi_end_io = nvmet_bio_done;
+ if (!pqt_enabled) {
+ bio->bi_private = req;
+ bio->bi_end_io = nvmet_bio_done;
+ } else {
+ req_done->req = req;
+ bio->bi_private = req_done;
+ bio->bi_end_io = nvmet_pqt_bio_done;
+ }
- blk_start_plug(&plug);
+ if (!pqt_enabled)
+ blk_start_plug(&plug);
if (req->metadata_len)
sg_miter_start(&prot_miter, req->metadata_sg,
req->metadata_sg_cnt, iter_flags);
- for_each_sg(req->sg, sg, req->sg_cnt, i) {
- while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
- != sg->length) {
- struct bio *prev = bio;
-
- if (req->metadata_len) {
- rc = nvmet_bdev_alloc_bip(req, bio,
- &prot_miter);
- if (unlikely(rc)) {
- bio_io_error(bio);
- return;
+ if (!pqt_enabled) {
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+ != sg->length) {
+ struct bio *prev = bio;
+
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio,
+ &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
}
- }
- bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
- opf, GFP_KERNEL);
- bio->bi_iter.bi_sector = sector;
+ bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
+ opf, GFP_KERNEL);
+ bio->bi_iter.bi_sector = sector;
- bio_chain(bio, prev);
- submit_bio(prev);
- }
+ bio_chain(bio, prev);
+ submit_bio(prev);
+ }
- sector += sg->length >> 9;
- sg_cnt--;
+ sector += sg->length >> 9;
+ sg_cnt--;
+ }
+ } else {
+ bio_list_init(&req_done->blist);
+ if (!test_bit(QUEUE_FLAG_POLL, &bdev->bd_disk->queue->queue_flags))
+ goto err_bio;
+ max_sectors = bdev->bd_disk->queue->limits.max_sectors;
+ max_sectors <<= 9;
+ max_segments = queue_max_segments(bdev->bd_disk->queue);
+ sg_len = 0;
+ unsigned int offset, len, vec_len, i;
+ bool sg_start_pg = true, need_chain_bio = false;
+ struct page *sglist_page, *max_sector_align;
+ sector_t temp_sector;
+
+ /*
+ * for bio's polling mode we will split bio to
+ * avoid low level's bio splitting when submit.
+ */
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ temp_sector = sector;
+ offset = (sg->offset % PAGE_SIZE);
+ if (offset + sg->length > PAGE_SIZE) { // need to split
+ len = sg->length;
+ i = 0;
+ sglist_page = virt_to_page(page_to_virt(sg_page(sg)) + offset);
+ if (offset != 0)
+ sg_start_pg = false;
+ while (len > PAGE_SIZE) {
+ max_sector_align = virt_to_page(page_to_virt(sglist_page) +
+ (PAGE_SIZE*i));
+ vec_len = sg_start_pg?PAGE_SIZE:(PAGE_SIZE - offset);
+ if (bio->bi_vcnt == max_segments - 1 ||
+ sg_len + vec_len > max_sectors)
+ need_chain_bio = true;
+ else {
+ __bio_add_page(bio, max_sector_align,
+ vec_len, sg_start_pg?0:offset);
+ temp_sector += vec_len >> 9;
+ sg_len += vec_len;
+ }
+ if (need_chain_bio) {
+ rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+ bdev, temp_sector, &req_done->blist);
+ if (unlikely(rc))
+ goto err_bio;
+ __bio_add_page(bio, max_sector_align, vec_len,
+ sg_start_pg?0:(PAGE_SIZE - offset));
+ temp_sector += vec_len >> 9;
+ sg_len = vec_len;
+ need_chain_bio = false;
+ }
+ if (!sg_start_pg) {
+ len -= (PAGE_SIZE - offset);
+ sg_start_pg = true;
+ } else {
+ len -= PAGE_SIZE;
+ }
+ i++;
+ }
+ if (len > 0) {
+ max_sector_align = virt_to_page(page_to_virt(sglist_page) +
+ (i * PAGE_SIZE));
+ if (bio->bi_vcnt == max_segments - 1 ||
+ sg_len + len > max_sectors) {
+ rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+ bdev, temp_sector, &req_done->blist);
+ if (unlikely(rc))
+ goto err_bio;
+ sg_len = len;
+ } else {
+ sg_len += len;
+ }
+ __bio_add_page(bio, max_sector_align, len, 0);
+ temp_sector += len >> 9;
+ }
+ } else {
+ if (bio->bi_vcnt == max_segments - 1 ||
+ sg_len + sg->length > max_sectors) {
+ rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+ bdev, temp_sector, &req_done->blist);
+ if (unlikely(rc))
+ goto err_bio;
+ sg_len = sg->length;
+ } else {
+ sg_len += sg->length;
+ }
+ __bio_add_page(bio, sg_page(sg), sg->length, sg->offset);
+ }
+ sector += sg->length >> 9;
+ sg_cnt--;
+ }
}
if (req->metadata_len) {
rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
if (unlikely(rc)) {
- bio_io_error(bio);
- return;
+ goto err_bio;
}
}
- submit_bio(bio);
- blk_finish_plug(&plug);
+ if (pqt_enabled) {
+ bio->bi_opf |= REQ_POLLED;
+ bio->bi_opf |= REQ_NOWAIT;
+ bio->bi_opf |= REQ_NOMERGE;
+ bio_list_add(&req_done->blist, bio);
+ req_done->io_completed = 0;
+ rc = nvmet_pqt_ring_enqueue(req_done);
+ if (rc < 0)
+ goto err_bio;
+ nvmet_wakeup_pq_thread();
+ } else {
+ submit_bio(bio);
+ }
+ if (!pqt_enabled)
+ blk_finish_plug(&plug);
+ return;
+err_bio:
+ bio_io_error(bio);
+ if (pqt_enabled)
+ kfree(req_done);
+ return;
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
--
2.26.2
Powered by blists - more mailing lists