[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250603095245.17478-2-tao.wangtao@honor.com>
Date: Tue, 3 Jun 2025 17:52:42 +0800
From: wangtao <tao.wangtao@...or.com>
To: <sumit.semwal@...aro.org>, <christian.koenig@....com>,
<kraxel@...hat.com>, <vivek.kasireddy@...el.com>, <viro@...iv.linux.org.uk>,
<brauner@...nel.org>, <hughd@...gle.com>, <akpm@...ux-foundation.org>,
<amir73il@...il.com>
CC: <benjamin.gaignard@...labora.com>, <Brian.Starkey@....com>,
<jstultz@...gle.com>, <tjmercier@...gle.com>, <jack@...e.cz>,
<baolin.wang@...ux.alibaba.com>, <linux-media@...r.kernel.org>,
<dri-devel@...ts.freedesktop.org>, <linaro-mm-sig@...ts.linaro.org>,
<linux-kernel@...r.kernel.org>, <linux-fsdevel@...r.kernel.org>,
<linux-mm@...ck.org>, <bintian.wang@...or.com>, <yipengxiang@...or.com>,
<liulu.liu@...or.com>, <feng.han@...or.com>, wangtao <tao.wangtao@...or.com>
Subject: [PATCH v4 1/4] fs: allow cross-FS copy_file_range for memory file with direct I/O
Memory files can optimize copy performance via copy_file_range callbacks:
-Compared to mmap&read: reduces GUP (get_user_pages) overhead
-Compared to sendfile/splice: eliminates one memory copy
-Supports dma-buf direct I/O zero-copy implementation
Suggested by: Christian König <christian.koenig@....com>
Suggested by: Amir Goldstein <amir73il@...il.com>
Signed-off-by: wangtao <tao.wangtao@...or.com>
---
fs/read_write.c | 64 +++++++++++++++++++++++++++++++++++++---------
include/linux/fs.h | 2 ++
2 files changed, 54 insertions(+), 12 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index bb0ed26a0b3a..ecb4f753c632 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1469,6 +1469,31 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
}
#endif
+static const struct file_operations *memory_copy_file_ops(
+ struct file *file_in, struct file *file_out)
+{
+ if ((file_in->f_op->fop_flags & FOP_MEMORY_FILE) &&
+ (file_in->f_mode & FMODE_CAN_ODIRECT) &&
+ file_in->f_op->copy_file_range && file_out->f_op->write_iter)
+ return file_in->f_op;
+ else if ((file_out->f_op->fop_flags & FOP_MEMORY_FILE) &&
+ (file_out->f_mode & FMODE_CAN_ODIRECT) &&
+ file_in->f_op->read_iter && file_out->f_op->copy_file_range)
+ return file_out->f_op;
+ else
+ return NULL;
+}
+
+static int essential_file_rw_checks(struct file *file_in, struct file *file_out)
+{
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
+
+ return 0;
+}
+
/*
* Performs necessary checks before doing a file copy
*
@@ -1484,9 +1509,16 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
struct inode *inode_out = file_inode(file_out);
uint64_t count = *req_count;
loff_t size_in;
+ bool splice = flags & COPY_FILE_SPLICE;
+ const struct file_operations *mem_fops;
int ret;
- ret = generic_file_rw_checks(file_in, file_out);
+ /* The dma-buf file is not a regular file. */
+ mem_fops = memory_copy_file_ops(file_in, file_out);
+ if (splice || mem_fops == NULL)
+ ret = generic_file_rw_checks(file_in, file_out);
+ else
+ ret = essential_file_rw_checks(file_in, file_out);
if (ret)
return ret;
@@ -1500,8 +1532,10 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
* and several different sets of file_operations, but they all end up
* using the same ->copy_file_range() function pointer.
*/
- if (flags & COPY_FILE_SPLICE) {
+ if (splice) {
/* cross sb splice is allowed */
+ } else if (mem_fops != NULL) {
+ /* cross-fs copy is allowed for memory file. */
} else if (file_out->f_op->copy_file_range) {
if (file_in->f_op->copy_file_range !=
file_out->f_op->copy_file_range)
@@ -1554,6 +1588,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
ssize_t ret;
bool splice = flags & COPY_FILE_SPLICE;
bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;
+ const struct file_operations *mem_fops;
if (flags & ~COPY_FILE_SPLICE)
return -EINVAL;
@@ -1574,18 +1609,27 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (len == 0)
return 0;
+ if (splice)
+ goto do_splice;
+
file_start_write(file_out);
/*
* Cloning is supported by more file systems, so we implement copy on
* same sb using clone, but for filesystems where both clone and copy
* are supported (e.g. nfs,cifs), we only call the copy method.
+ * For copy to/from memory file, we alway call the copy method of the
+ * memory file.
*/
- if (!splice && file_out->f_op->copy_file_range) {
+ mem_fops = memory_copy_file_ops(file_in, file_out);
+ if (mem_fops) {
+ ret = mem_fops->copy_file_range(file_in, pos_in,
+ file_out, pos_out, len, flags);
+ } else if (file_out->f_op->copy_file_range) {
ret = file_out->f_op->copy_file_range(file_in, pos_in,
- file_out, pos_out,
- len, flags);
- } else if (!splice && file_in->f_op->remap_file_range && samesb) {
+ file_out, pos_out,
+ len, flags);
+ } else if (file_in->f_op->remap_file_range && samesb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
min_t(loff_t, MAX_RW_COUNT, len),
@@ -1603,6 +1647,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (!splice)
goto done;
+do_splice:
/*
* We can get here for same sb copy of filesystems that do not implement
* ->copy_file_range() in case filesystem does not support clone or in
@@ -1786,12 +1831,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
-
- return 0;
+ return essential_file_rw_checks(file_in, file_out);
}
int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..37df1b497418 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2187,6 +2187,8 @@ struct file_operations {
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* File system supports uncached read/write buffered IO */
#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
+/* Supports cross-FS copy_file_range for memory file */
+#define FOP_MEMORY_FILE ((__force fop_flags_t)(1 << 8))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
--
2.17.1
Powered by blists - more mailing lists