lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 22 Mar 2015 16:14:54 +0800
From:	Ming Lei <ming.lei@...onical.com>
To:	Jens Axboe <axboe@...com>, linux-kernel@...r.kernel.org
Cc:	Christoph Hellwig <hch@....de>, Al Viro <viro@...iv.linux.org.uk>,
	Maxim Patlasov <mpatlasov@...allels.com>,
	Ming Lei <ming.lei@...onical.com>
Subject: [PATCH 3/3] block: loop: use vfs ITER_BVEC to read/write backing file

Now loop code gets simplified a lot, and becomes more clean.

Also one extra page copy is avoided for READ in case of none
transfer.

Signed-off-by: Ming Lei <ming.lei@...onical.com>
---
 drivers/block/loop.c |  285 ++++++++++++++++++++++----------------------------
 drivers/block/loop.h |    3 +
 2 files changed, 127 insertions(+), 161 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c082cf7..f3c470a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,6 +75,7 @@
 #include <linux/sysfs.h>
 #include <linux/miscdevice.h>
 #include <linux/falloc.h>
+#include <linux/uio.h>
 #include "loop.h"
 
 #include <asm/uaccess.h>
@@ -87,26 +88,51 @@ static int part_shift;
 
 static struct workqueue_struct *loop_wq;
 
-/*
- * Transfer functions
- */
-static int transfer_none(struct loop_device *lo, int cmd,
-			 struct page *raw_page, unsigned raw_off,
-			 struct page *loop_page, unsigned loop_off,
-			 int size, sector_t real_block)
+struct ibvec_rw_data {
+	struct bio_vec *bvec;
+	unsigned long nr_segs;
+	size_t count;
+	int rw;
+	loff_t pos;
+};
+
+static ssize_t vfs_ibvec_rw(struct loop_device *lo, struct ibvec_rw_data *data)
 {
-	char *raw_buf = kmap_atomic(raw_page) + raw_off;
-	char *loop_buf = kmap_atomic(loop_page) + loop_off;
+	struct iov_iter iter;
+	struct file *file = lo->lo_backing_file;
+
+	iov_iter_bvec(&iter, ITER_BVEC | data->rw, data->bvec,
+		      data->nr_segs, data->count);
 
-	if (cmd == READ)
-		memcpy(loop_buf, raw_buf, size);
+	if (data->rw == READ)
+		return vfs_iter_read(file, &iter, &data->pos);
 	else
-		memcpy(raw_buf, loop_buf, size);
+		return vfs_iter_write(file, &iter, &data->pos);
+}
 
-	kunmap_atomic(loop_buf);
-	kunmap_atomic(raw_buf);
-	cond_resched();
-	return 0;
+static ssize_t vfs_rw(struct loop_device *lo, struct ibvec_rw_data *data)
+{
+	char *buf;
+	struct bio_vec *bvec;
+	struct file *file;
+	int ret;
+
+	if (lo->vfs_rw_iter)
+		return vfs_ibvec_rw(lo, data);
+
+	/* fallback to vfs_read and vfs_write */
+	BUG_ON(data->nr_segs != 1);
+
+	file = lo->lo_backing_file;
+	bvec = data->bvec;
+	buf = kmap(bvec->bv_page) + bvec->bv_offset;
+
+	if (data->rw == READ)
+		ret = vfs_read(file, buf, bvec->bv_len, &data->pos);
+	else
+		ret = vfs_write(file, buf, bvec->bv_len, &data->pos);
+	kunmap(bvec->bv_page);
+	return ret;
 }
 
 static int transfer_xor(struct loop_device *lo, int cmd,
@@ -147,7 +173,6 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
 
 static struct loop_func_table none_funcs = {
 	.number = LO_CRYPT_NONE,
-	.transfer = transfer_none,
 }; 	
 
 static struct loop_func_table xor_funcs = {
@@ -214,74 +239,45 @@ lo_do_transfer(struct loop_device *lo, int cmd,
 	       struct page *lpage, unsigned loffs,
 	       int size, sector_t rblock)
 {
-	if (unlikely(!lo->transfer))
-		return 0;
-
 	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 }
 
 /**
- * __do_lo_send_write - helper for writing data to a loop device
- *
- * This helper just factors out common code between do_lo_send_direct_write()
- * and do_lo_send_write().
- */
-static int __do_lo_send_write(struct file *file,
-		u8 *buf, const int len, loff_t pos)
-{
-	ssize_t bw;
-	mm_segment_t old_fs = get_fs();
-
-	file_start_write(file);
-	set_fs(get_ds());
-	bw = file->f_op->write(file, buf, len, &pos);
-	set_fs(old_fs);
-	file_end_write(file);
-	if (likely(bw == len))
-		return 0;
-	printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
-			(unsigned long long)pos, len);
-	if (bw >= 0)
-		bw = -EIO;
-	return bw;
-}
-
-/**
- * do_lo_send_direct_write - helper for writing data to a loop device
+ * do_lo_send_write - helper for writing data to a loop device
  *
- * This is the fast, non-transforming version that does not need double
- * buffering.
  */
-static int do_lo_send_direct_write(struct loop_device *lo,
-		struct bio_vec *bvec, loff_t pos, struct page *page)
+static ssize_t do_lo_send_write(struct loop_device *lo,
+				struct loop_cmd *cmd,
+				struct bio_vec *bvec, loff_t pos)
 {
-	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
-			kmap(bvec->bv_page) + bvec->bv_offset,
-			bvec->bv_len, pos);
-	kunmap(bvec->bv_page);
-	cond_resched();
-	return bw;
-}
+	ssize_t ret;
+	struct ibvec_rw_data data;
+	struct bio_vec  r_bvec = *bvec;
+	struct page *r_page = cmd->trans_page;
+
+	if (r_page != NULL) {
+		ret = lo_do_transfer(lo, WRITE, r_page, 0,
+				     bvec->bv_page, bvec->bv_offset,
+				     bvec->bv_len, pos >> 9);
+		if (unlikely(ret))
+			goto fail;
 
-/**
- * do_lo_send_write - helper for writing data to a loop device
- *
- * This is the slow, transforming version that needs to double buffer the
- * data as it cannot do the transformations in place without having direct
- * access to the destination pages of the backing file.
- */
-static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
-		loff_t pos, struct page *page)
-{
-	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
-			bvec->bv_offset, bvec->bv_len, pos >> 9);
-	if (likely(!ret)) {
-		ret =  __do_lo_send_write(lo->lo_backing_file,
-				kmap(page), bvec->bv_len,
-				pos);
-		kunmap(page);
-		return ret;
+		r_bvec.bv_page = r_page;
+		r_bvec.bv_offset = 0;
 	}
+
+	data.bvec = &r_bvec;
+	data.count = bvec->bv_len;
+	data.pos = pos;
+	data.nr_segs = 1;
+	data.rw = WRITE;
+
+	ret = vfs_rw(lo, &data);
+	if (ret < 0)
+		goto fail;
+	return ret;
+
+ fail:
 	printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, "
 			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
 	if (ret > 0)
@@ -289,108 +285,64 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
 	return ret;
 }
 
-static int lo_send(struct loop_device *lo, struct loop_cmd *cmd, loff_t pos)
+static ssize_t lo_send(struct loop_device *lo, struct loop_cmd *cmd,
+		       loff_t pos)
 {
-	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
-			struct page *page);
 	struct bio_vec bvec;
 	struct req_iterator iter;
-	struct page *page = NULL;
 	int ret = 0;
 	struct request *rq = cmd->rq;
 
-	if (lo->transfer != transfer_none) {
-		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
-		if (unlikely(!page))
-			goto fail;
-		do_lo_send = do_lo_send_write;
-	} else {
-		do_lo_send = do_lo_send_direct_write;
-	}
-
 	rq_for_each_segment(bvec, rq, iter) {
-		ret = do_lo_send(lo, &bvec, pos, page);
+		ret = do_lo_send_write(lo, cmd, &bvec, pos);
 		if (ret < 0)
 			break;
 		pos += bvec.bv_len;
 	}
-	if (page) {
-		__free_page(page);
-	}
-out:
-	return ret;
-fail:
-	printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
-	ret = -ENOMEM;
-	goto out;
-}
-
-struct lo_read_data {
-	struct loop_device *lo;
-	struct page *page;
-	unsigned offset;
-	int bsize;
-};
-
-static int
-lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-		struct splice_desc *sd)
-{
-	struct lo_read_data *p = sd->u.data;
-	struct loop_device *lo = p->lo;
-	struct page *page = buf->page;
-	sector_t IV;
-	int size;
-
-	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
-							(buf->offset >> 9);
-	size = sd->len;
-	if (size > p->bsize)
-		size = p->bsize;
-
-	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
-		printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n",
-		       page->index);
-		size = -EINVAL;
-	}
 
-	flush_dcache_page(p->page);
-
-	if (size > 0)
-		p->offset += size;
-
-	return size;
-}
-
-static int
-lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
-{
-	return __splice_from_pipe(pipe, sd, lo_splice_actor);
+	if (ret > 0)
+		ret = 0;
+	return ret;
 }
 
 static ssize_t
-do_lo_receive(struct loop_device *lo,
-	      struct bio_vec *bvec, int bsize, loff_t pos)
+do_lo_receive(struct loop_device *lo, struct loop_cmd *cmd,
+	      struct bio_vec *bvec, loff_t pos)
 {
-	struct lo_read_data cookie;
-	struct splice_desc sd;
-	struct file *file;
 	ssize_t retval;
+	struct ibvec_rw_data data;
+	struct bio_vec  r_bvec = *bvec;
+	bool trans = false;
+
+	if (cmd->trans_page != NULL) {
+		r_bvec.bv_page = cmd->trans_page;
+		r_bvec.bv_offset = 0;
+		trans = true;
+	}
 
-	cookie.lo = lo;
-	cookie.page = bvec->bv_page;
-	cookie.offset = bvec->bv_offset;
-	cookie.bsize = bsize;
-
-	sd.len = 0;
-	sd.total_len = bvec->bv_len;
-	sd.flags = 0;
-	sd.pos = pos;
-	sd.u.data = &cookie;
+	data.bvec = &r_bvec;
+	data.count = r_bvec.bv_len;
+	data.pos = pos;
+	data.nr_segs = 1;
+	data.rw = READ;
 
-	file = lo->lo_backing_file;
-	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+	retval = vfs_rw(lo, &data);
+	if (retval < 0)
+		goto out;
 
+	if (trans) {
+		retval = lo_do_transfer(lo, READ, r_bvec.bv_page, 0,
+				bvec->bv_page, bvec->bv_offset, retval,
+				pos >> 9);
+		if (retval < 0)
+			goto out;
+		flush_dcache_page(bvec->bv_page);
+	}
+out:
+	if (retval < 0)
+		printk_ratelimited(KERN_ERR "loop: transfer error block "
+				   "%lld ret=%ld\n", pos >> 9,
+				   (long)retval);
 	return retval;
 }
 
@@ -401,10 +353,9 @@ lo_receive(struct loop_device *lo, struct loop_cmd *cmd, loff_t pos)
 	struct req_iterator iter;
 	ssize_t s;
 	struct request *rq = cmd->rq;
-	int	bsize = lo->lo_blocksize;
 
 	rq_for_each_segment(bvec, rq, iter) {
-		s = do_lo_receive(lo, &bvec, bsize, pos);
+		s = do_lo_receive(lo, cmd, &bvec, pos);
 		if (s < 0)
 			return s;
 
@@ -458,12 +409,22 @@ static inline int lo_rw(struct loop_device *lo, struct loop_cmd *cmd,
 		loff_t pos, int rw)
 {
 	int ret;
+	struct page *page = NULL;
+
+	if (lo->transfer != NULL) {
+		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+		if (unlikely(!page))
+			return -ENOMEM;
+	}
+	cmd->trans_page = page;
 
 	if (rw == READ)
 		ret = lo_receive(lo, cmd, pos);
 	else
 		ret = lo_send(lo, cmd, pos);
 
+	if (cmd->trans_page)
+		__free_page(cmd->trans_page);
 	return ret;
 }
 
@@ -804,7 +765,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
-	lo->transfer = transfer_none;
+	lo->transfer = NULL;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
@@ -813,6 +774,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
 		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
 
+	lo->vfs_rw_iter = file->f_op->read_iter && file->f_op->write_iter;
+
 	set_capacity(lo->lo_disk, size);
 	bd_set_size(bdev, size << 9);
 	loop_sysfs_init(lo);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f..981c8b9 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -63,12 +63,15 @@ struct loop_device {
 	struct request_queue	*lo_queue;
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
+
+	bool			vfs_rw_iter;
 };
 
 struct loop_cmd {
 	struct work_struct read_work;
 	struct request *rq;
 	struct list_head list;
+	struct page *trans_page;	/* only for encrypted transfer */
 };
 
 /* Support for loadable transfer modules */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ