lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 23 Nov 2022 11:28:27 +0530
From:   Nitesh Shetty <nj.shetty@...sung.com>
To:     axboe@...nel.dk, agk@...hat.com, snitzer@...nel.org,
        dm-devel@...hat.com, kbusch@...nel.org, hch@....de,
        sagi@...mberg.me, james.smart@...adcom.com, kch@...dia.com,
        damien.lemoal@...nsource.wdc.com, naohiro.aota@....com,
        jth@...nel.org, viro@...iv.linux.org.uk
Cc:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-nvme@...ts.infradead.org, linux-fsdevel@...r.kernel.org,
        anuj20.g@...sung.com, joshi.k@...sung.com, p.raghav@...sung.com,
        nitheshshetty@...il.com, gost.dev@...sung.com,
        Nitesh Shetty <nj.shetty@...sung.com>
Subject: [PATCH v5 10/10] fs: add support for copy file range in zonefs

copy_file_range is implemented using copy offload,
copy offloading to device is always enabled.
To disable copy offloading mount with "no_copy_offload" mount option.
At present copy offload is only used, if the source and destination files
are on same block device, otherwise copy file range is completed by
generic copy file range.

copy file range implemented as following:
	- write pending writes on the src and dest files
	- drop page cache for dest file if its conv zone
	- copy the range using offload
	- update dest file info

For all failure cases we fallback to generic file copy range
At present this implementation does not support conv aggregation

Signed-off-by: Nitesh Shetty <nj.shetty@...sung.com>
Signed-off-by: Anuj Gupta <anuj20.g@...sung.com>
---
 fs/zonefs/super.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index abc9a85106f2..15613433d4ae 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1223,6 +1223,183 @@ static int zonefs_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int zonefs_is_file_copy_offset_ok(struct inode *src_inode,
+		struct inode *dst_inode, loff_t src_off, loff_t dst_off,
+		size_t *len)
+{
+	loff_t size, endoff;
+	struct zonefs_inode_info *dst_zi = ZONEFS_I(dst_inode);
+
+	inode_lock(src_inode);
+	size = i_size_read(src_inode);
+	inode_unlock(src_inode);
+	/* Don't copy beyond source file EOF. */
+	if (src_off < size) {
+		if (src_off + *len > size)
+			*len = (size - (src_off + *len));
+	} else
+		*len = 0;
+
+	mutex_lock(&dst_zi->i_truncate_mutex);
+	if (dst_zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
+		if (*len > dst_zi->i_max_size - dst_zi->i_wpoffset)
+			*len -= dst_zi->i_max_size - dst_zi->i_wpoffset;
+
+		if (dst_off != dst_zi->i_wpoffset)
+			goto err;
+	}
+	mutex_unlock(&dst_zi->i_truncate_mutex);
+
+	endoff = dst_off + *len;
+	inode_lock(dst_inode);
+	if (endoff > dst_zi->i_max_size ||
+			inode_newsize_ok(dst_inode, endoff)) {
+		inode_unlock(dst_inode);
+		goto err;
+	}
+	inode_unlock(dst_inode);
+
+	return 0;
+err:
+	mutex_unlock(&dst_zi->i_truncate_mutex);
+	return -EINVAL;
+}
+
+static ssize_t zonefs_issue_copy(struct zonefs_inode_info *src_zi,
+		loff_t src_off, struct zonefs_inode_info *dst_zi,
+		loff_t dst_off, size_t len)
+{
+	struct block_device *src_bdev = src_zi->i_vnode.i_sb->s_bdev;
+	struct block_device *dst_bdev = dst_zi->i_vnode.i_sb->s_bdev;
+	struct range_entry *rlist = NULL;
+	int ret = len;
+
+	rlist = kmalloc(sizeof(*rlist), GFP_KERNEL);
+	if (!rlist)
+		return -ENOMEM;
+
+	rlist[0].dst = (dst_zi->i_zsector << SECTOR_SHIFT) + dst_off;
+	rlist[0].src = (src_zi->i_zsector << SECTOR_SHIFT) + src_off;
+	rlist[0].len = len;
+	rlist[0].comp_len = 0;
+	ret = blkdev_issue_copy(src_bdev, dst_bdev, rlist, 1, NULL, NULL,
+			GFP_KERNEL);
+	if (rlist[0].comp_len > 0)
+		ret = rlist[0].comp_len;
+	kfree(rlist);
+
+	return ret;
+}
+
+/* Returns length of possible copy, else returns error */
+static ssize_t zonefs_copy_file_checks(struct file *src_file, loff_t src_off,
+					struct file *dst_file, loff_t dst_off,
+					size_t *len, unsigned int flags)
+{
+	struct inode *src_inode = file_inode(src_file);
+	struct inode *dst_inode = file_inode(dst_file);
+	struct zonefs_inode_info *src_zi = ZONEFS_I(src_inode);
+	struct zonefs_inode_info *dst_zi = ZONEFS_I(dst_inode);
+	ssize_t ret;
+
+	if (src_inode->i_sb != dst_inode->i_sb)
+		return -EXDEV;
+
+	/* Start by sync'ing the source and destination files for conv zones */
+	if (src_zi->i_ztype == ZONEFS_ZTYPE_CNV) {
+		ret = file_write_and_wait_range(src_file, src_off,
+				(src_off + *len));
+		if (ret < 0)
+			goto io_error;
+	}
+	inode_dio_wait(src_inode);
+
+	/* Start by sync'ing the source and destination files ifor conv zones */
+	if (dst_zi->i_ztype == ZONEFS_ZTYPE_CNV) {
+		ret = file_write_and_wait_range(dst_file, dst_off,
+				(dst_off + *len));
+		if (ret < 0)
+			goto io_error;
+	}
+	inode_dio_wait(dst_inode);
+
+	/* Drop dst file cached pages for a conv zone*/
+	if (dst_zi->i_ztype == ZONEFS_ZTYPE_CNV) {
+		ret = invalidate_inode_pages2_range(dst_inode->i_mapping,
+				dst_off >> PAGE_SHIFT,
+				(dst_off + *len) >> PAGE_SHIFT);
+		if (ret < 0)
+			goto io_error;
+	}
+
+	ret = zonefs_is_file_copy_offset_ok(src_inode, dst_inode, src_off,
+			dst_off, len);
+	if (ret < 0)
+		return ret;
+
+	return *len;
+
+io_error:
+	zonefs_io_error(dst_inode, true);
+	return ret;
+}
+
+static ssize_t zonefs_copy_file(struct file *src_file, loff_t src_off,
+		struct file *dst_file, loff_t dst_off,
+		size_t len, unsigned int flags)
+{
+	struct inode *src_inode = file_inode(src_file);
+	struct inode *dst_inode = file_inode(dst_file);
+	struct zonefs_inode_info *src_zi = ZONEFS_I(src_inode);
+	struct zonefs_inode_info *dst_zi = ZONEFS_I(dst_inode);
+	ssize_t ret = 0, bytes;
+
+	inode_lock(src_inode);
+	inode_lock(dst_inode);
+	bytes = zonefs_issue_copy(src_zi, src_off, dst_zi, dst_off, len);
+	if (bytes < 0)
+		goto unlock_exit;
+
+	ret += bytes;
+
+	file_update_time(dst_file);
+	mutex_lock(&dst_zi->i_truncate_mutex);
+	zonefs_update_stats(dst_inode, dst_off + bytes);
+	zonefs_i_size_write(dst_inode, dst_off + bytes);
+	dst_zi->i_wpoffset += bytes;
+	mutex_unlock(&dst_zi->i_truncate_mutex);
+	/* if we still have some bytes left, do splice copy */
+	if (bytes && (bytes < len)) {
+		bytes = do_splice_direct(src_file, &src_off, dst_file,
+					 &dst_off, len, flags);
+		if (bytes > 0)
+			ret += bytes;
+	}
+unlock_exit:
+	if (ret < 0)
+		zonefs_io_error(dst_inode, true);
+	inode_unlock(src_inode);
+	inode_unlock(dst_inode);
+	return ret;
+}
+
+static ssize_t zonefs_copy_file_range(struct file *src_file, loff_t src_off,
+				      struct file *dst_file, loff_t dst_off,
+				      size_t len, unsigned int flags)
+{
+	ssize_t ret = -EIO;
+
+	ret = zonefs_copy_file_checks(src_file, src_off, dst_file, dst_off,
+				     &len, flags);
+	if (ret > 0)
+		ret = zonefs_copy_file(src_file, src_off, dst_file, dst_off,
+				     len, flags);
+	else if (ret < 0 && ret == -EXDEV)
+		ret = generic_copy_file_range(src_file, src_off, dst_file,
+					      dst_off, len, flags);
+	return ret;
+}
+
 static const struct file_operations zonefs_file_operations = {
 	.open		= zonefs_file_open,
 	.release	= zonefs_file_release,
@@ -1234,6 +1411,7 @@ static const struct file_operations zonefs_file_operations = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.iopoll		= iocb_bio_iopoll,
+	.copy_file_range = zonefs_copy_file_range,
 };
 
 static struct kmem_cache *zonefs_inode_cachep;
@@ -1804,6 +1982,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
 	atomic_set(&sbi->s_active_seq_files, 0);
 	sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
 
+	/* set copy support by default */
 	ret = zonefs_read_super(sb);
 	if (ret)
 		return ret;
-- 
2.35.1.500.gb896f729e2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ