lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1269842965-4700-6-git-send-email-dmonakhov@openvz.org>
Date:	Mon, 29 Mar 2010 10:09:24 +0400
From:	Dmitry Monakhov <dmonakhov@...nvz.org>
To:	linux-kernel@...r.kernel.org
Cc:	jens.axboe@...cle.com, Dmitry Monakhov <dmonakhov@...nvz.org>
Subject: [PATCH 5/6] blkdev: optimize discard request logic

Current submit procedure is suboptimal. Each bio is flagged with
barrier flag, so we will end up with following trace:
for_each_bio(discar_bios) {
  q->pre_flush
  handle(bio);
  disk->flush_cache
  q->post_flush
}
But in fact user want following semantics:
q->pre_flush()
for_each_bio(discar_bios)
  handle(bio)
q->pre_flush()

Signed-off-by: Dmitry Monakhov <dmonakhov@...nvz.org>
---
 block/blk-lib.c |  141 +++++++++++++++++++++++++++++++-----------------------
 1 files changed, 81 insertions(+), 60 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 4c5a278..5326d91 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -129,17 +129,22 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
 
 static void blkdev_discard_end_io(struct bio *bio, int err)
 {
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
-
-	if (bio->bi_private)
-		complete(bio->bi_private);
-	__free_page(bio_page(bio));
+	if (bio_page(bio))
+		put_page(bio_page(bio));
+}
 
-	bio_put(bio);
+static int add_discard_payload(struct bio *bio, gfp_t gfp_mask, int len)
+{
+	struct page *page;
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+
+	page = alloc_page(gfp_mask | __GFP_ZERO);
+	if (!page)
+			return -ENOMEM;
+	if (bio_add_pc_page(q, bio, page, len, 0) < len)
+		BUG();
+	get_page(page);
+	return 0;
 }
 
 /**
@@ -153,16 +158,18 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
  * Description:
  *    Issue a discard request for the sectors in question.
  */
+
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
+	int ret = 0;
+	struct bio *bio;
+	struct bio_batch bb;
+	unsigned int sz, issued = 0;
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
-	struct bio *bio;
-	struct page *page;
-	int ret = 0;
+	unsigned int sect_sz;
+	sector_t max_discard_sectors;
 
 	if (!q)
 		return -ENXIO;
@@ -170,65 +177,79 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
-	while (nr_sects && !ret) {
-		unsigned int sector_size = q->limits.logical_block_size;
-		unsigned int max_discard_sectors =
-			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	sect_sz = q->limits.logical_block_size;
+	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	atomic_set(&bb.done, 0);
+	bb.flags = 1 << BIO_UPTODATE;
+	bb.wait = &wait;
+	bb.end_io = blkdev_discard_end_io;
 
-		bio = bio_alloc(gfp_mask, 1);
+	if (flags & BLKDEV_IFL_BARRIER) {
+		/* issue async barrier before the data */
+		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
+		if (ret)
+			return ret;
+	}
+submit:
+	while (nr_sects != 0) {
+		bio = bio_alloc(gfp_mask,
+				min(nr_sects, (sector_t)BIO_MAX_PAGES));
 		if (!bio)
-			goto out;
+			break;
+
 		bio->bi_sector = sector;
-		bio->bi_end_io = blkdev_discard_end_io;
-		bio->bi_bdev = bdev;
-		if (flags & BLKDEV_IFL_BARRIER)
-			bio->bi_private = &wait;
+		bio->bi_bdev   = bdev;
+		bio->bi_end_io = bio_batch_end_io;
+
+		if (flags && BLKDEV_IFL_WAIT)
+			bio->bi_private = &bb;
 
 		if (blk_queue_discard_mem(q)) {
 			/*
 			 * Add a zeroed one-sector payload as that's what
-			 * our current implementations need.  If we'll ever
+			 * our current implementations need. If we'll ever
 			 * need more the interface will need revisiting.
 			 */
-			page = alloc_page(gfp_mask | __GFP_ZERO);
-			if (!page)
-				goto out_free_bio;
-			if (bio_add_pc_page(q, bio, page, sector_size, 0) <
-				sector_size)
-				goto out_free_page;
-		}
-		/*
-		 * And override the bio size - the way discard works we
-		 * touch many more blocks on disk than the actual payload
-		 * length.
-		 */
-		if (nr_sects > max_discard_sectors) {
-			bio->bi_size = max_discard_sectors << 9;
-			nr_sects -= max_discard_sectors;
-			sector += max_discard_sectors;
-		} else {
-			bio->bi_size = nr_sects << 9;
-			nr_sects = 0;
+			ret = add_discard_payload(bio, gfp_mask, sect_sz);
+			if (ret) {
+				bio_put(bio);
+				break;
+			}
 		}
+		sz = min(max_discard_sectors , nr_sects);
+		bio->bi_size = sz << 9;
+		nr_sects -= sz;
+		sector += sz;
 
-		bio_get(bio);
-		submit_bio(type, bio);
+		issued++;
+		submit_bio(DISCARD_NOBARRIER, bio);
+	}
+	/*
+	 * When all data bios are in flight. Send final barrier if requeted.
+	 */
+	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
+		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
+					flags & BLKDEV_IFL_WAIT);
 
-		if (flags & BLKDEV_IFL_BARRIER)
+	if (flags & BLKDEV_IFL_WAIT)
+		/* Wait for submitted bios and then continue */
+		while ( issued != atomic_read(&bb.done))
 			wait_for_completion(&wait);
 
-		if (bio_flagged(bio, BIO_EOPNOTSUPP))
-			ret = -EOPNOTSUPP;
-		else if (!bio_flagged(bio, BIO_UPTODATE))
-			ret = -EIO;
-		bio_put(bio);
+	if (!test_bit(BIO_UPTODATE, &bb.flags))
+		/* One of bios in the batch was completed with error.*/
+		ret = -EIO;
+
+	if (ret)
+		goto out;
+
+	if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
-	return ret;
-out_free_page:
-	__free_page(page);
-out_free_bio:
-	bio_put(bio);
+	if (nr_sects != 0)
+		goto submit;
 out:
-	return -ENOMEM;
+	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ