[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date: Fri, 2 Feb 2024 19:54:46 +0000
From: Matthew Wilcox <willy@...radead.org>
To: Christoph Hellwig <hch@....de>
Cc: linux-fsdevel@...r.kernel.org, linux-ext4@...r.kernel.org
Subject: Re: [PATCH 1/3] fs: Introduce buffered_write_operations
On Thu, Feb 01, 2024 at 05:42:46AM +0100, Christoph Hellwig wrote:
> On Thu, Feb 01, 2024 at 04:36:09AM +0000, Matthew Wilcox wrote:
> > +static struct folio *iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> > + size_t len)
> >
> > with corresponding changes. Again, ends up looking slightly cleaner.
>
> iomap also really needs some tweaks to the naming in the area as a
> __foo function calling foo as the default is horrible. I'll take a
> look at your patches and can add that on top.
>
> > f2fs also doesn't seem terribly objectional; passing rpages between
> > begin & end.
> >
> > ocfs2 is passing a ocfs2_write_ctxt between the two.
>
> Well, it might be the intended purpose, but as-is it is horribly
> inefficient - they need to do a dynamic allocation for every
> page they iterate over. So all of them are candidates for an
> iterator model that does this allocation once per write.
Oh, I see what you mean. What I could do is pass in the fsdata,
like this.
commit 753c7d2d62e1
Author: Matthew Wilcox (Oracle) <willy@...radead.org>
Date: Mon Jan 29 23:20:34 2024 -0500
fs: Introduce buffered_write_operations
Start the process of moving write_begin and write_end out from the
address_space_operations to their own struct.
The new write_begin returns the folio or an ERR_PTR instead of returning
the folio by reference. It also accepts len as a size_t and (as
documented) the len may be larger than PAGE_SIZE.
Pass an optional buffered_write_operations pointer to various functions
in filemap.c. The old names are available as macros for now, except
for generic_file_write_iter() which is used as a function pointer by
many filesystems. If using the new functions, the filesystem can have
per-operation fsdata instead of per-page fsdata.
Signed-off-by: Matthew Wilcox (Oracle) <willy@...radead.org>
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..a79c7f15ca9f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -18,6 +18,27 @@
struct folio_batch;
+struct buffered_write_operations {
+ struct folio *(*write_begin)(struct file *, struct address_space *,
+ loff_t pos, size_t len, void **fsdata);
+ size_t (*write_end)(struct file *, struct address_space *,
+ loff_t pos, size_t len, size_t copied,
+ struct folio *folio, void **fsdata);
+};
+
+ssize_t filemap_write_iter(struct kiocb *, struct iov_iter *,
+ const struct buffered_write_operations *, void *fsdata);
+ssize_t __filemap_write_iter(struct kiocb *, struct iov_iter *,
+ const struct buffered_write_operations *, void *fsdata);
+ssize_t filemap_perform_write(struct kiocb *, struct iov_iter *,
+ const struct buffered_write_operations *, void *fsdata);
+
+ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+#define generic_perform_write(kiocb, iter) \
+ filemap_perform_write(kiocb, iter, NULL, NULL)
+#define __generic_file_write_iter(kiocb, iter) \
+ __filemap_write_iter(kiocb, iter, NULL, NULL)
+
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db..214266aeaca5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -95,7 +95,7 @@
* ->invalidate_lock (filemap_fault)
* ->lock_page (filemap_fault, access_process_vm)
*
- * ->i_rwsem (generic_perform_write)
+ * ->i_rwsem (filemap_perform_write)
* ->mmap_lock (fault_in_readable->do_page_fault)
*
* bdi->wb.list_lock
@@ -3890,7 +3890,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
}
EXPORT_SYMBOL(generic_file_direct_write);
-ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
+ssize_t filemap_perform_write(struct kiocb *iocb, struct iov_iter *i,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct file *file = iocb->ki_filp;
loff_t pos = iocb->ki_pos;
@@ -3900,11 +3901,10 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
ssize_t written = 0;
do {
- struct page *page;
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
+ struct folio *folio;
+ size_t offset; /* Offset into pagecache folio */
+ size_t bytes; /* Bytes to write to page */
size_t copied; /* Bytes copied from user */
- void *fsdata = NULL;
offset = (pos & (PAGE_SIZE - 1));
bytes = min_t(unsigned long, PAGE_SIZE - offset,
@@ -3927,19 +3927,33 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
break;
}
- status = a_ops->write_begin(file, mapping, pos, bytes,
+ if (ops) {
+ folio = ops->write_begin(file, mapping, pos, bytes, &fsdata);
+ if (IS_ERR(folio)) {
+ status = PTR_ERR(folio);
+ break;
+ }
+ } else {
+ struct page *page;
+ status = a_ops->write_begin(file, mapping, pos, bytes,
&page, &fsdata);
- if (unlikely(status < 0))
- break;
+ if (unlikely(status < 0))
+ break;
+ folio = page_folio(page);
+ }
if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
- copied = copy_page_from_iter_atomic(page, offset, bytes, i);
- flush_dcache_page(page);
+ copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
+ flush_dcache_folio(folio);
- status = a_ops->write_end(file, mapping, pos, bytes, copied,
- page, fsdata);
+ if (ops)
+ status = ops->write_end(file, mapping, pos, bytes,
+ copied, folio, &fsdata);
+ else
+ status = a_ops->write_end(file, mapping, pos, bytes,
+ copied, &folio->page, fsdata);
if (unlikely(status != copied)) {
iov_iter_revert(i, copied - max(status, 0L));
if (unlikely(status < 0))
@@ -3969,12 +3983,13 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
iocb->ki_pos += written;
return written;
}
-EXPORT_SYMBOL(generic_perform_write);
+EXPORT_SYMBOL(filemap_perform_write);
/**
- * __generic_file_write_iter - write data to a file
- * @iocb: IO state structure (file, offset, etc.)
- * @from: iov_iter with data to write
+ * __filemap_write_iter - write data to a file
+ * @iocb: IO state structure (file, offset, etc.)
+ * @from: iov_iter with data to write
+ * @ops: How to inform the filesystem that a write is starting/finishing.
*
* This function does all the work needed for actually writing data to a
* file. It does all basic checks, removes SUID from the file, updates
@@ -3992,7 +4007,8 @@ EXPORT_SYMBOL(generic_perform_write);
* * number of bytes written, even for truncated writes
* * negative error code if no data has been written at all
*/
-ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t __filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -4019,27 +4035,29 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
return ret;
return direct_write_fallback(iocb, from, ret,
- generic_perform_write(iocb, from));
+ filemap_perform_write(iocb, from, ops, fsdata));
}
- return generic_perform_write(iocb, from);
+ return filemap_perform_write(iocb, from, ops, fsdata);
}
-EXPORT_SYMBOL(__generic_file_write_iter);
+EXPORT_SYMBOL(__filemap_write_iter);
/**
- * generic_file_write_iter - write data to a file
+ * filemap_write_iter - write data to a file
* @iocb: IO state structure
* @from: iov_iter with data to write
*
- * This is a wrapper around __generic_file_write_iter() to be used by most
+ * This is a wrapper around __filemap_write_iter() to be used by most
* filesystems. It takes care of syncing the file in case of O_SYNC file
* and acquires i_rwsem as needed.
+ *
* Return:
- * * negative error code if no data has been written at all of
+ * * negative error code if no data has been written at all or if
* vfs_fsync_range() failed for a synchronous write
* * number of bytes written, even for truncated writes
*/
-ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -4048,13 +4066,18 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0)
- ret = __generic_file_write_iter(iocb, from);
+ ret = __filemap_write_iter(iocb, from, ops, fsdata);
inode_unlock(inode);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
}
+
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ return filemap_write_iter(iocb, from, NULL, NULL);
+}
EXPORT_SYMBOL(generic_file_write_iter);
/**
Powered by blists - more mailing lists