lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <cb74516ed445c47cecd0e9500f5766d6e8615e83.1694592708.git.jacky_gam_2001@163.com>
Date:   Wed, 13 Sep 2023 16:36:43 +0800
From:   Ping Gan <jacky_gam_2001@....com>
To:     kbusch@...nel.org, axboe@...nel.dk, hch@....de, sagi@...mberg.me,
        kch@...dia.com, linux-kernel@...r.kernel.org,
        linux-nvme@...ts.infradead.org
Cc:     ping_gan@...l.com, jacky_gam_2001@....com
Subject: [PATCH 3/4] nvmet: support bio polling queue request

If enabling bio polling queue task, we will split and chain
the bios if needed, then fill the request to the lossless ring
of polling queue task.

Signed-off-by: Ping Gan <jacky_gam_2001@....com>
---
 drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++++++++++++++++++++----
 1 file changed, 214 insertions(+), 29 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 468833675cc9..6f7d04ae6cb7 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -184,6 +184,16 @@ static void nvmet_bio_done(struct bio *bio)
 	nvmet_req_bio_put(req, bio);
 }
 
+static void nvmet_pqt_bio_done(struct bio *bio)
+{
+	struct nvmet_pqt_bio_req *req_done = bio->bi_private;
+
+	nvmet_req_complete(req_done->req, blk_to_nvme_status(req_done->req,
+							bio->bi_status));
+	nvmet_req_bio_put(req_done->req, bio);
+	req_done->io_completed = 1;
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 				struct sg_mapping_iter *miter)
@@ -237,6 +247,38 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+#ifdef CONFIG_NVME_MULTIPATH
+extern struct block_device *nvme_mpath_get_bdev(struct block_device *bdev);
+extern const struct block_device_operations nvme_ns_head_ops;
+#endif
+
+static inline int nvmet_chain_par_bio(struct nvmet_req *req, struct bio **bio,
+					struct sg_mapping_iter *prot_miter, struct block_device *bdev,
+					sector_t sector, struct bio_list *blist)
+{
+	struct bio *parent, *child;
+	unsigned int vec_cnt;
+	int rc;
+
+	parent = *bio;
+	vec_cnt = queue_max_segments(bdev->bd_disk->queue);
+	if (req->metadata_len) {
+		rc = nvmet_bdev_alloc_bip(req, parent,
+						prot_miter);
+		if (unlikely(rc))
+			return rc;
+	}
+	child = bio_alloc(bdev, vec_cnt, parent->bi_opf, GFP_KERNEL);
+	child->bi_iter.bi_sector = sector;
+	*bio = child;
+	bio_chain(*bio, parent);
+	parent->bi_opf |= REQ_POLLED;
+	parent->bi_opf |= REQ_NOWAIT;
+	parent->bi_opf |= REQ_NOMERGE;
+	bio_list_add(blist, parent);
+	return 0;
+}
+
 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 {
 	unsigned int sg_cnt = req->sg_cnt;
@@ -247,8 +289,13 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	blk_opf_t opf;
 	int i, rc;
 	struct sg_mapping_iter prot_miter;
-	unsigned int iter_flags;
+	unsigned int iter_flags, max_sectors;
+	unsigned short vec_cnt, max_segments;
 	unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
+	bool pqt_enabled = nvmet_pqt_enabled();
+	unsigned int sg_len;
+	struct nvmet_pqt_bio_req *req_done = NULL;
+	struct block_device *bdev = req->ns->bdev;
 
 	if (!nvmet_check_transfer_len(req, total_len))
 		return;
@@ -268,6 +315,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		iter_flags = SG_MITER_FROM_SG;
 	}
 
+#ifdef CONFIG_NVME_MULTIPATH
+	if (pqt_enabled && bdev->bd_disk->fops == &nvme_ns_head_ops) {
+		bdev = nvme_mpath_get_bdev(bdev);
+		if (!bdev) {
+			nvmet_req_complete(req, 0);
+			return;
+		}
+		opf |= REQ_DRV;
+	}
+#endif
+	if (pqt_enabled) {
+		req_done = kmalloc(sizeof(struct nvmet_pqt_bio_req), GFP_KERNEL);
+		if (!req_done) {
+			nvmet_req_complete(req, 0);
+			return;
+		}
+	}
+
 	if (is_pci_p2pdma_page(sg_page(req->sg)))
 		opf |= REQ_NOMERGE;
 
@@ -278,54 +343,174 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		bio_init(bio, req->ns->bdev, req->inline_bvec,
 			 ARRAY_SIZE(req->inline_bvec), opf);
 	} else {
-		bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
+		vec_cnt = bio_max_segs(sg_cnt);
+		if (pqt_enabled)
+			vec_cnt = queue_max_segments(bdev->bd_disk->queue);
+		bio = bio_alloc(bdev, vec_cnt, opf,
 				GFP_KERNEL);
 	}
 	bio->bi_iter.bi_sector = sector;
-	bio->bi_private = req;
-	bio->bi_end_io = nvmet_bio_done;
+	if (!pqt_enabled) {
+		bio->bi_private = req;
+		bio->bi_end_io = nvmet_bio_done;
+	} else {
+		req_done->req = req;
+		bio->bi_private = req_done;
+		bio->bi_end_io = nvmet_pqt_bio_done;
+	}
 
-	blk_start_plug(&plug);
+	if (!pqt_enabled)
+		blk_start_plug(&plug);
 	if (req->metadata_len)
 		sg_miter_start(&prot_miter, req->metadata_sg,
 			       req->metadata_sg_cnt, iter_flags);
 
-	for_each_sg(req->sg, sg, req->sg_cnt, i) {
-		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
-				!= sg->length) {
-			struct bio *prev = bio;
-
-			if (req->metadata_len) {
-				rc = nvmet_bdev_alloc_bip(req, bio,
-							  &prot_miter);
-				if (unlikely(rc)) {
-					bio_io_error(bio);
-					return;
+	if (!pqt_enabled) {
+		for_each_sg(req->sg, sg, req->sg_cnt, i) {
+			while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+					!= sg->length) {
+				struct bio *prev = bio;
+
+				if (req->metadata_len) {
+					rc = nvmet_bdev_alloc_bip(req, bio,
+								  &prot_miter);
+					if (unlikely(rc)) {
+						bio_io_error(bio);
+						return;
+					}
 				}
-			}
 
-			bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
-					opf, GFP_KERNEL);
-			bio->bi_iter.bi_sector = sector;
+				bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
+						opf, GFP_KERNEL);
+				bio->bi_iter.bi_sector = sector;
 
-			bio_chain(bio, prev);
-			submit_bio(prev);
-		}
+				bio_chain(bio, prev);
+				submit_bio(prev);
+			}
 
-		sector += sg->length >> 9;
-		sg_cnt--;
+			sector += sg->length >> 9;
+			sg_cnt--;
+		}
+	} else {
+		bio_list_init(&req_done->blist);
+		if (!test_bit(QUEUE_FLAG_POLL, &bdev->bd_disk->queue->queue_flags))
+			goto err_bio;
+		max_sectors = bdev->bd_disk->queue->limits.max_sectors;
+		max_sectors <<= 9;
+		max_segments = queue_max_segments(bdev->bd_disk->queue);
+		sg_len = 0;
+		unsigned int offset, len, vec_len, i;
+		bool sg_start_pg = true, need_chain_bio = false;
+		struct page *sglist_page, *max_sector_align;
+		sector_t temp_sector;
+
+		/*
+		 * for bio's polling mode we will split bio to
+		 * avoid low level's bio splitting when submit.
+		 */
+		for_each_sg(req->sg, sg, req->sg_cnt, i) {
+			temp_sector = sector;
+			offset = (sg->offset % PAGE_SIZE);
+			if (offset + sg->length > PAGE_SIZE) { // need to split
+				len = sg->length;
+				i = 0;
+				sglist_page = virt_to_page(page_to_virt(sg_page(sg)) + offset);
+				if (offset != 0)
+					sg_start_pg = false;
+				while (len > PAGE_SIZE) {
+					max_sector_align = virt_to_page(page_to_virt(sglist_page) +
+											(PAGE_SIZE*i));
+					vec_len = sg_start_pg?PAGE_SIZE:(PAGE_SIZE - offset);
+					if (bio->bi_vcnt == max_segments - 1 ||
+							sg_len + vec_len > max_sectors)
+						need_chain_bio = true;
+					else {
+						__bio_add_page(bio, max_sector_align,
+								vec_len, sg_start_pg?0:offset);
+						temp_sector += vec_len >> 9;
+						sg_len += vec_len;
+					}
+					if (need_chain_bio) {
+						rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+								bdev, temp_sector, &req_done->blist);
+						if (unlikely(rc))
+							goto err_bio;
+						__bio_add_page(bio, max_sector_align, vec_len,
+								sg_start_pg?0:(PAGE_SIZE - offset));
+						temp_sector += vec_len >> 9;
+						sg_len = vec_len;
+						need_chain_bio = false;
+					}
+					if (!sg_start_pg) {
+						len -= (PAGE_SIZE - offset);
+						sg_start_pg = true;
+					} else {
+						len -= PAGE_SIZE;
+					}
+					i++;
+				}
+				if (len > 0) {
+					max_sector_align = virt_to_page(page_to_virt(sglist_page) +
+											(i * PAGE_SIZE));
+					if (bio->bi_vcnt == max_segments - 1 ||
+							sg_len + len > max_sectors) {
+						rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+								bdev, temp_sector, &req_done->blist);
+						if (unlikely(rc))
+							goto err_bio;
+						sg_len = len;
+					} else {
+						sg_len += len;
+					}
+					__bio_add_page(bio, max_sector_align, len, 0);
+					temp_sector += len >> 9;
+				}
+			} else {
+				if (bio->bi_vcnt == max_segments - 1 ||
+						sg_len + sg->length > max_sectors) {
+					rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+							bdev, temp_sector, &req_done->blist);
+					if (unlikely(rc))
+						goto err_bio;
+					sg_len = sg->length;
+				} else {
+					sg_len += sg->length;
+				}
+				__bio_add_page(bio, sg_page(sg), sg->length, sg->offset);
+			}
+			sector += sg->length >> 9;
+			sg_cnt--;
+		}
 	}
 
 	if (req->metadata_len) {
 		rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
 		if (unlikely(rc)) {
-			bio_io_error(bio);
-			return;
+			goto err_bio;
 		}
 	}
 
-	submit_bio(bio);
-	blk_finish_plug(&plug);
+	if (pqt_enabled) {
+		bio->bi_opf |= REQ_POLLED;
+		bio->bi_opf |= REQ_NOWAIT;
+		bio->bi_opf |= REQ_NOMERGE;
+		bio_list_add(&req_done->blist, bio);
+		req_done->io_completed = 0;
+		rc = nvmet_pqt_ring_enqueue(req_done);
+		if (rc < 0)
+			goto err_bio;
+		nvmet_wakeup_pq_thread();
+	} else {
+	    submit_bio(bio);
+    }
+	if (!pqt_enabled)
+		blk_finish_plug(&plug);
+	return;
+err_bio:
+	bio_io_error(bio);
+	if (pqt_enabled)
+		kfree(req_done);
+	return;
 }
 
 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
-- 
2.26.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ