[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230627183629.26571-4-nj.shetty@samsung.com>
Date: Wed, 28 Jun 2023 00:06:17 +0530
From: Nitesh Shetty <nj.shetty@...sung.com>
To: Jens Axboe <axboe@...nel.dk>, Jonathan Corbet <corbet@....net>,
Alasdair Kergon <agk@...hat.com>,
Mike Snitzer <snitzer@...nel.org>, dm-devel@...hat.com,
Keith Busch <kbusch@...nel.org>,
Christoph Hellwig <hch@....de>,
Sagi Grimberg <sagi@...mberg.me>,
Chaitanya Kulkarni <kch@...dia.com>,
Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>
Cc: martin.petersen@...cle.com, linux-scsi@...r.kernel.org,
willy@...radead.org, hare@...e.de, djwong@...nel.org,
bvanassche@....org, ming.lei@...hat.com, dlemoal@...nel.org,
nitheshshetty@...il.com, gost.dev@...sung.com,
Nitesh Shetty <nj.shetty@...sung.com>,
Vincent Fu <vincent.fu@...sung.com>,
Anuj Gupta <anuj20.g@...sung.com>, linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-doc@...r.kernel.org,
linux-nvme@...ts.infradead.org, linux-fsdevel@...r.kernel.org
Subject: [PATCH v13 3/9] block: add emulation for copy
For the devices which does not support copy, copy emulation is added.
It is required for in-kernel users like fabrics, where file descriptor is
not available and hence they can't use copy_file_range.
Copy-emulation is implemented by reading from source into memory and
writing to the corresponding destination asynchronously.
Also emulation is used, if copy offload fails or partially completes.
Signed-off-by: Nitesh Shetty <nj.shetty@...sung.com>
Signed-off-by: Vincent Fu <vincent.fu@...sung.com>
Signed-off-by: Anuj Gupta <anuj20.g@...sung.com>
---
block/blk-lib.c | 183 +++++++++++++++++++++++++++++++++++++-
block/blk-map.c | 4 +-
include/linux/blk_types.h | 5 ++
include/linux/blkdev.h | 3 +
4 files changed, 192 insertions(+), 3 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 10c3eadd5bf6..09e0d5d51d03 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -234,6 +234,180 @@ static ssize_t __blkdev_copy_offload(
return blkdev_copy_wait_io_completion(cio);
}
+static void *blkdev_copy_alloc_buf(sector_t req_size, sector_t *alloc_size,
+ gfp_t gfp_mask)
+{
+ int min_size = PAGE_SIZE;
+ void *buf;
+
+ while (req_size >= min_size) {
+ buf = kvmalloc(req_size, gfp_mask);
+ if (buf) {
+ *alloc_size = req_size;
+ return buf;
+ }
+ /* retry half the requested size */
+ req_size >>= 1;
+ }
+
+ return NULL;
+}
+
+static void blkdev_copy_emulate_write_endio(struct bio *bio)
+{
+ struct copy_ctx *ctx = bio->bi_private;
+ struct cio *cio = ctx->cio;
+ sector_t clen;
+
+ if (bio->bi_status) {
+ clen = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - cio->pos_out;
+ cio->comp_len = min_t(sector_t, clen, cio->comp_len);
+ }
+ kfree(bvec_virt(&bio->bi_io_vec[0]));
+ bio_map_kern_endio(bio);
+ kfree(ctx);
+ if (atomic_dec_and_test(&cio->refcount)) {
+ if (cio->endio) {
+ cio->endio(cio->private, cio->comp_len);
+ kfree(cio);
+ } else
+ blk_wake_io_task(cio->waiter);
+ }
+}
+
+static void blkdev_copy_emulate_read_endio(struct bio *read_bio)
+{
+ struct copy_ctx *ctx = read_bio->bi_private;
+ struct cio *cio = ctx->cio;
+ sector_t clen;
+
+ if (read_bio->bi_status) {
+ clen = (read_bio->bi_iter.bi_sector << SECTOR_SHIFT) -
+ cio->pos_in;
+ cio->comp_len = min_t(sector_t, clen, cio->comp_len);
+ kfree(bvec_virt(&read_bio->bi_io_vec[0]));
+ bio_map_kern_endio(read_bio);
+ kfree(ctx);
+
+ if (atomic_dec_and_test(&cio->refcount)) {
+ if (cio->endio) {
+ cio->endio(cio->private, cio->comp_len);
+ kfree(cio);
+ } else
+ blk_wake_io_task(cio->waiter);
+ }
+ }
+ schedule_work(&ctx->dispatch_work);
+ kfree(read_bio);
+}
+
+static void blkdev_copy_dispatch_work(struct work_struct *work)
+{
+ struct copy_ctx *ctx = container_of(work, struct copy_ctx,
+ dispatch_work);
+
+ submit_bio(ctx->write_bio);
+}
+
+/*
+ * If native copy offload feature is absent, this function tries to emulate,
+ * by copying data from source to a temporary buffer and from buffer to
+ * destination device.
+ * Returns the length of bytes copied or error if encountered
+ */
+static ssize_t __blkdev_copy_emulate(
+ struct block_device *bdev_in, loff_t pos_in,
+ struct block_device *bdev_out, loff_t pos_out,
+ size_t len, cio_iodone_t endio, void *private, gfp_t gfp_mask)
+{
+ struct request_queue *in = bdev_get_queue(bdev_in);
+ struct request_queue *out = bdev_get_queue(bdev_out);
+ struct bio *read_bio, *write_bio;
+ void *buf = NULL;
+ struct copy_ctx *ctx;
+ struct cio *cio;
+ sector_t buf_len, req_len, rem = 0;
+ sector_t max_src_hw_len = min_t(unsigned int,
+ queue_max_hw_sectors(in),
+ queue_max_segments(in) << (PAGE_SHIFT - SECTOR_SHIFT))
+ << SECTOR_SHIFT;
+ sector_t max_dst_hw_len = min_t(unsigned int,
+ queue_max_hw_sectors(out),
+ queue_max_segments(out) << (PAGE_SHIFT - SECTOR_SHIFT))
+ << SECTOR_SHIFT;
+ sector_t max_hw_len = min_t(unsigned int,
+ max_src_hw_len, max_dst_hw_len);
+
+ cio = kzalloc(sizeof(struct cio), GFP_KERNEL);
+ if (!cio)
+ return -ENOMEM;
+ atomic_set(&cio->refcount, 0);
+ cio->pos_in = pos_in;
+ cio->pos_out = pos_out;
+ cio->waiter = current;
+ cio->endio = endio;
+ cio->private = private;
+
+ for (rem = len; rem > 0; rem -= buf_len) {
+ req_len = min_t(int, max_hw_len, rem);
+
+ buf = blkdev_copy_alloc_buf(req_len, &buf_len, gfp_mask);
+ if (!buf)
+ goto err_alloc_buf;
+
+ ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
+ if (!ctx)
+ goto err_ctx;
+
+ read_bio = bio_map_kern(in, buf, buf_len, gfp_mask);
+ if (IS_ERR(read_bio))
+ goto err_read_bio;
+
+ write_bio = bio_map_kern(out, buf, buf_len, gfp_mask);
+ if (IS_ERR(write_bio))
+ goto err_write_bio;
+
+ ctx->cio = cio;
+ ctx->write_bio = write_bio;
+ INIT_WORK(&ctx->dispatch_work, blkdev_copy_dispatch_work);
+
+ read_bio->bi_iter.bi_sector = pos_in >> SECTOR_SHIFT;
+ read_bio->bi_iter.bi_size = buf_len;
+ read_bio->bi_opf = REQ_OP_READ | REQ_SYNC;
+ bio_set_dev(read_bio, bdev_in);
+ read_bio->bi_end_io = blkdev_copy_emulate_read_endio;
+ read_bio->bi_private = ctx;
+
+ write_bio->bi_iter.bi_size = buf_len;
+ write_bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio_set_dev(write_bio, bdev_out);
+ write_bio->bi_end_io = blkdev_copy_emulate_write_endio;
+ write_bio->bi_iter.bi_sector = pos_out >> SECTOR_SHIFT;
+ write_bio->bi_private = ctx;
+
+ atomic_inc(&cio->refcount);
+ submit_bio(read_bio);
+
+ pos_in += buf_len;
+ pos_out += buf_len;
+ }
+ return blkdev_copy_wait_io_completion(cio);
+
+err_write_bio:
+ bio_put(read_bio);
+err_read_bio:
+ kfree(ctx);
+err_ctx:
+ kvfree(buf);
+err_alloc_buf:
+ cio->comp_len -= min_t(sector_t, cio->comp_len, len - rem);
+ if (!atomic_read(&cio->refcount)) {
+ kfree(cio);
+ return -ENOMEM;
+ }
+ return blkdev_copy_wait_io_completion(cio);
+}
+
static inline ssize_t blkdev_copy_sanity_check(
struct block_device *bdev_in, loff_t pos_in,
struct block_device *bdev_out, loff_t pos_out,
@@ -284,9 +458,16 @@ ssize_t blkdev_copy_offload(
if (ret)
return ret;
- if (blk_queue_copy(q_in) && blk_queue_copy(q_out))
+ if (blk_queue_copy(q_in) && blk_queue_copy(q_out)) {
ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
len, endio, private, gfp_mask);
+ if (ret < 0)
+ ret = 0;
+ }
+
+ if (ret != len)
+ ret = __blkdev_copy_emulate(bdev_in, pos_in + ret, bdev_out,
+ pos_out + ret, len - ret, endio, private, gfp_mask);
return ret;
}
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..ceeb70a95fd1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -363,7 +363,7 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
#endif
}
-static void bio_map_kern_endio(struct bio *bio)
+void bio_map_kern_endio(struct bio *bio)
{
bio_invalidate_vmalloc_pages(bio);
bio_uninit(bio);
@@ -380,7 +380,7 @@ static void bio_map_kern_endio(struct bio *bio)
* Map the kernel address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
-static struct bio *bio_map_kern(struct request_queue *q, void *data,
+struct bio *bio_map_kern(struct request_queue *q, void *data,
unsigned int len, gfp_t gfp_mask)
{
unsigned long kaddr = (unsigned long)data;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 336146798e56..f8c80940c7ad 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -562,4 +562,9 @@ struct cio {
atomic_t refcount;
};
+struct copy_ctx {
+ struct cio *cio;
+ struct work_struct dispatch_work;
+ struct bio *write_bio;
+};
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 963f5c97dec0..c176bf6173c5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1047,6 +1047,9 @@ ssize_t blkdev_copy_offload(
struct block_device *bdev_in, loff_t pos_in,
struct block_device *bdev_out, loff_t pos_out,
size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
+ gfp_t gfp_mask);
+void bio_map_kern_endio(struct bio *bio);
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
--
2.35.1.500.gb896f729e2
Powered by blists - more mailing lists