[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230503183821.1473305-9-john.g.garry@oracle.com>
Date: Wed, 3 May 2023 18:38:13 +0000
From: John Garry <john.g.garry@...cle.com>
To: axboe@...nel.dk, kbusch@...nel.org, hch@....de, sagi@...mberg.me,
martin.petersen@...cle.com, djwong@...nel.org,
viro@...iv.linux.org.uk, brauner@...nel.org, dchinner@...hat.com,
jejb@...ux.ibm.com
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-nvme@...ts.infradead.org, linux-scsi@...r.kernel.org,
linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-security-module@...r.kernel.org, paul@...l-moore.com,
jmorris@...ei.org, serge@...lyn.com,
John Garry <john.g.garry@...cle.com>
Subject: [PATCH RFC 08/16] block: Add support for atomic_write_unit
Add bio.atomic_write_unit, which is the min size which we can split a bio.
Any bio needs to be split in a multiple of this size and also aligned to
this size.
In __bio_iov_iter_get_pages(), use atomic_write_unit to trim a bio to
be a multiple of atomic_write_unit.
In bio_split_rw(), we need to consider splitting as follows:
- For a regular split which does not cross an atomic write boundary, same
as in __bio_iov_iter_get_pages(), trim to be a multiple of
atomic_write_unit
- We also need to check for when a bio straddles an atomic write boundary.
In this case, split to be start/end-aligned with the boundary.
We need to ignore lim->max_sectors since to may be less than
bio->write_atomic_unit, which we cannot tolerate.
Signed-off-by: John Garry <john.g.garry@...cle.com>
---
block/bio.c | 7 +++-
block/blk-merge.c | 84 ++++++++++++++++++++++++++++++++++-----
include/linux/blk_types.h | 2 +
3 files changed, 81 insertions(+), 12 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index fd11614bba4d..fc2f29e1c14c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -247,6 +247,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf)
{
bio->bi_next = NULL;
+ bio->atomic_write_unit = 0;
bio->bi_bdev = bdev;
bio->bi_opf = opf;
bio->bi_flags = 0;
@@ -815,6 +816,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_iter = bio_src->bi_iter;
+ bio->atomic_write_unit = bio_src->atomic_write_unit;
if (bio->bi_bdev) {
if (bio->bi_bdev == bio_src->bi_bdev &&
bio_flagged(bio_src, BIO_REMAPPED))
@@ -1273,7 +1275,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
- trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
+ if (bio->atomic_write_unit)
+ trim = size & (bio->atomic_write_unit - 1);
+ else
+ trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
iov_iter_revert(iter, trim);
size -= trim;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6460abdb2426..95ab6b644955 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -171,7 +171,17 @@ static inline unsigned get_max_io_size(struct bio *bio,
{
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
- unsigned max_sectors = lim->max_sectors, start, end;
+ unsigned max_sectors, start, end;
+
+ /*
+ * We ignore lim->max_sectors for atomic writes simply because
+ * it may less than bio->write_atomic_unit, which we cannot
+ * tolerate.
+ */
+ if (bio->bi_opf & REQ_ATOMIC)
+ max_sectors = lim->atomic_write_max_bytes >> SECTOR_SHIFT;
+ else
+ max_sectors = lim->max_sectors;
if (lim->chunk_sectors) {
max_sectors = min(max_sectors,
@@ -256,6 +266,22 @@ static bool bvec_split_segs(const struct queue_limits *lim,
return len > 0 || bv->bv_len > max_len;
}
+static bool bio_straddles_boundary(struct bio *bio, unsigned int bytes,
+ unsigned int boundary)
+{
+ loff_t start = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ loff_t end = start + bytes;
+ loff_t start_mod = start % boundary;
+ loff_t end_mod = end % boundary;
+
+ if (end - start > boundary)
+ return true;
+ if ((start_mod > end_mod) && (start_mod && end_mod))
+ return true;
+
+ return false;
+}
+
/**
* bio_split_rw - split a bio in two bios
* @bio: [in] bio to be split
@@ -276,10 +302,15 @@ static bool bvec_split_segs(const struct queue_limits *lim,
* responsible for ensuring that @bs is only destroyed after processing of the
* split bio has finished.
*/
+
+
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, struct bio_set *bs, unsigned max_bytes)
{
+ unsigned int atomic_write_boundary = lim->atomic_write_boundary;
+ bool atomic_write = bio->bi_opf & REQ_ATOMIC;
struct bio_vec bv, bvprv, *bvprvp = NULL;
+ bool straddles_boundary = false;
struct bvec_iter iter;
unsigned nsegs = 0, bytes = 0;
@@ -291,14 +322,31 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
goto split;
+ if (atomic_write && atomic_write_boundary) {
+ straddles_boundary = bio_straddles_boundary(bio,
+ bytes + bv.bv_len, atomic_write_boundary);
+ }
if (nsegs < lim->max_segments &&
bytes + bv.bv_len <= max_bytes &&
- bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
+ bv.bv_offset + bv.bv_len <= PAGE_SIZE &&
+ !straddles_boundary) {
nsegs++;
bytes += bv.bv_len;
} else {
- if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
- lim->max_segments, max_bytes))
+ bool split_the_segs =
+ bvec_split_segs(lim, &bv, &nsegs, &bytes,
+ lim->max_segments, max_bytes);
+
+ /*
+ * We may not actually straddle the boundary as we may
+ * have added less bytes than anticipated
+ */
+ if (straddles_boundary) {
+ straddles_boundary = bio_straddles_boundary(bio,
+ bytes, atomic_write_boundary);
+ }
+
+ if (split_the_segs || straddles_boundary)
goto split;
}
@@ -321,12 +369,25 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
*segs = nsegs;
- /*
- * Individual bvecs might not be logical block aligned. Round down the
- * split size so that each bio is properly block size aligned, even if
- * we do not use the full hardware limits.
- */
- bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
+ if (straddles_boundary) {
+ loff_t new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes;
+ unsigned int trim = new_end & (atomic_write_boundary - 1);
+ bytes -= trim;
+ new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes;
+ BUG_ON(new_end % atomic_write_boundary);
+ } else if (bio->atomic_write_unit) {
+ unsigned int atomic_write_unit = bio->atomic_write_unit;
+ unsigned int trim = bytes % atomic_write_unit;
+
+ bytes -= trim;
+ } else {
+ /*
+ * Individual bvecs might not be logical block aligned. Round down the
+ * split size so that each bio is properly block size aligned, even if
+ * we do not use the full hardware limits.
+ */
+ bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
+ }
/*
* Bio splitting may cause subtle trouble such as hang when doing sync
@@ -355,7 +416,8 @@ struct bio *__bio_split_to_limits(struct bio *bio,
const struct queue_limits *lim,
unsigned int *nr_segs)
{
- struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
+ struct block_device *bi_bdev = bio->bi_bdev;
+ struct bio_set *bs = &bi_bdev->bd_disk->bio_split;
struct bio *split;
switch (bio_op(bio)) {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 347b52e00322..daa44eac9f14 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -303,6 +303,8 @@ struct bio {
struct bio_set *bi_pool;
+ unsigned int atomic_write_unit;
+
/*
* We can inline a number of vecs at the end of the bio, to avoid
* double allocations for a small number of bio_vecs. This member
--
2.31.1
Powered by blists - more mailing lists