lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu,  8 Feb 2007 14:07:36 +0100 (CET)
From:	Nick Piggin <npiggin@...e.de>
To:	Linux Filesystems <linux-fsdevel@...r.kernel.org>
Cc:	Linux Kernel <linux-kernel@...r.kernel.org>,
	Nick Piggin <npiggin@...e.de>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Linus Torvalds <akpm@...ux-foundation.org>
Subject: [patch 2/3] fs: introduce perform_write aop

Add a new "perform_write" aop, which replaces prepare_write and commit_write
as a single call to copy a given amount of userdata at the given offset. This
is more flexible, because the implementation can determine how to best handle
errors, or multi-page ranges (eg. it may use a gang lookup), and only requires
one call into the fs.

One problem with this interface is that it cannot be used to write into the
filesystem by any means other than already-initialised buffers via iovecs. So
prepare/commit have to stay around for non-user data... 

Another thing is that it seems to be less able to be implemented in generic,
reusable code. It should be possible to introduce a new 2-op interface (or
maybe just a new error handler op) which can be used correctly in generic code.

I avoided that way in my first attempt because this seemed more elegant from
a logical POV. But after seeing the implementation, I'm having second
thoughts.

 include/linux/fs.h |    6 ++++
 mm/filemap.c       |   64 +++++++++++++++++++++++++++++++++--------------------
 2 files changed, 47 insertions(+), 23 deletions(-)

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -447,6 +447,12 @@ struct address_space_operations {
 	 */
 	int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 	int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
+
+	/*
+	 * perform_write replaces prepare and commit_write callbacks.
+	 */
+	int (*perform_write)(struct file *, struct iovec_iterator *, loff_t);
+
 	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 	sector_t (*bmap)(struct address_space *, sector_t);
 	void (*invalidatepage) (struct page *, unsigned long);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -1920,8 +1920,7 @@ EXPORT_SYMBOL(generic_file_direct_write)
  * Find or create a page at the given pagecache position. Return the locked
  * page. This function is specifically for buffered writes.
  */
-static struct page *__grab_cache_page(struct address_space *mapping,
-							pgoff_t index)
+struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
 {
 	int status;
 	struct page *page;
@@ -1943,19 +1942,14 @@ repeat:
 	return page;
 }
 
-ssize_t
-generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos, loff_t *ppos,
-		size_t count, ssize_t written)
+static ssize_t generic_perform_write(struct file *file,
+				struct iovec_iterator *i, loff_t pos)
 {
-	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	const struct address_space_operations *a_ops = mapping->a_ops;
-	struct inode 	*inode = mapping->host;
-	long		status = 0;
-	struct iovec_iterator i;
-
-	iovec_iterator_init(&i, iov, nr_segs, count, written);
+	struct inode *inode = mapping->host;
+	long status = 0;
+	ssize_t written = 0;
 
 	do {
 		struct page *src_page;
@@ -1968,7 +1962,7 @@ generic_file_buffered_write(struct kiocb
 		offset = (pos & (PAGE_CACHE_SIZE - 1));
 		index = pos >> PAGE_CACHE_SHIFT;
 		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
-						iovec_iterator_count(&i));
+						iovec_iterator_count(i));
 
 		/*
 		 * a non-NULL src_page indicates that we're doing the
@@ -1986,7 +1980,7 @@ generic_file_buffered_write(struct kiocb
 		 * to check that the address is actually valid, when atomic
 		 * usercopies are used, below.
 		 */
-		if (unlikely(iovec_iterator_fault_in_readable(&i))) {
+		if (unlikely(iovec_iterator_fault_in_readable(i))) {
 			status = -EFAULT;
 			break;
 		}
@@ -2017,7 +2011,7 @@ generic_file_buffered_write(struct kiocb
 			 * same reason as we can't take a page fault with a
 			 * page locked (as explained below).
 			 */
-			copied = iovec_iterator_copy_from_user(src_page, &i,
+			copied = iovec_iterator_copy_from_user(src_page, i,
 								offset, bytes);
 			if (unlikely(copied == 0)) {
 				status = -EFAULT;
@@ -2056,7 +2050,7 @@ generic_file_buffered_write(struct kiocb
 			 * really matter.
 			 */
 			pagefault_disable();
-			copied = iovec_iterator_copy_from_user_atomic(page, &i,
+			copied = iovec_iterator_copy_from_user_atomic(page, i,
 								offset, bytes);
 			pagefault_enable();
 		} else {
@@ -2082,9 +2076,9 @@ generic_file_buffered_write(struct kiocb
 		if (src_page)
 			page_cache_release(src_page);
 
-		iovec_iterator_advance(&i, copied);
-		written += copied;
+		iovec_iterator_advance(i, copied);
 		pos += copied;
+		written += copied;
 
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
@@ -2108,13 +2102,37 @@ fs_write_aop_error:
 			continue;
 		else
 			break;
-	} while (iovec_iterator_count(&i));
-	*ppos = pos;
+	} while (iovec_iterator_count(i));
+
+	return written ? written : status;
+}
+
+ssize_t
+generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos, loff_t *ppos,
+		size_t count, ssize_t written)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	const struct address_space_operations *a_ops = mapping->a_ops;
+	struct inode 	*inode = mapping->host;
+	long		status = 0;
+	struct iovec_iterator i;
+
+	iovec_iterator_init(&i, iov, nr_segs, count, written);
+	if (!a_ops->perform_write)
+		status = generic_perform_write(file, &i, pos);
+	else
+		status = a_ops->perform_write(file, &i, pos);
 
-	/*
-	 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
-	 */
 	if (likely(status >= 0)) {
+		written += status;
+		*ppos = pos + status;
+
+		/*
+		 * For now, when the user asks for O_SYNC, we'll actually give
+		 * O_DSYNC
+		 */
 		if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 			if (!a_ops->writepage || !is_sync_kiocb(iocb))
 				status = generic_osync_inode(inode, mapping,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ