[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20150420122412.440d6e66@canb.auug.org.au>
Date: Mon, 20 Apr 2015 12:24:12 +1000
From: Stephen Rothwell <sfr@...b.auug.org.au>
To: Ben Myers <bpm@....com>, David Chinner <david@...morbit.com>,
xfs@....sgi.com
Cc: linux-next@...r.kernel.org, linux-kernel@...r.kernel.org,
Al Viro <viro@...iv.linux.org.uk>
Subject: linux-next: manual merge of the xfs tree with Linus' tree
Hi all,
Today's linux-next merge of the xfs tree got a conflict in
fs/xfs/xfs_file.c between commits 99733fa372ea
("xfs_file_aio_write_checks: switch to iocb/iov_iter") and 3309dd04cbcd
("switch generic_write_checks() to iocb and iter") from Linus' tree and
commits b9d59846f737 ("xfs: DIO write completion size updates race"),
40c63fbc55a9 ("xfs: direct IO EOF zeroing needs to drain AIO") and
0cefb29e6a63 ("xfs: using generic_file_direct_write() is unnecessary")
from the xfs tree.
I fixed it up (I have no idea if this is right - see below) and can
carry the fix as necessary (no action is required).
--
Cheers,
Stephen Rothwell sfr@...b.auug.org.au
diff --cc fs/xfs/xfs_file.c
index 1f12ad0a8585,3a5d305e60c9..000000000000
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -544,22 -545,21 +544,22 @@@ xfs_zero_eof
*/
STATIC ssize_t
xfs_file_aio_write_checks(
- struct file *file,
- loff_t *pos,
- size_t *count,
+ struct kiocb *iocb,
+ struct iov_iter *from,
int *iolock)
{
+ struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- int error = 0;
+ ssize_t error = 0;
+ size_t count = iov_iter_count(from);
restart:
- error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
- if (error)
+ error = generic_write_checks(iocb, from);
+ if (error <= 0)
return error;
- error = xfs_break_layouts(inode, iolock);
+ error = xfs_break_layouts(inode, iolock, true);
if (error)
return error;
@@@ -569,21 -569,41 +569,42 @@@
* write. If zeroing is needed and we are currently holding the
* iolock shared, we need to update it to exclusive which implies
* having to redo all checks before.
+ *
+ * We need to serialise against EOF updates that occur in IO
+ * completions here. We want to make sure that nobody is changing the
+ * size while we do this check until we have placed an IO barrier (i.e.
+ * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+ * The spinlock effectively forms a memory barrier once we have the
+ * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+ * and hence be able to correctly determine if we need to run zeroing.
*/
+ spin_lock(&ip->i_flags_lock);
- if (*pos > i_size_read(inode)) {
+ if (iocb->ki_pos > i_size_read(inode)) {
bool zero = false;
+ spin_unlock(&ip->i_flags_lock);
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
+ iov_iter_reexpand(from, count);
+
+ /*
+ * We now have an IO submission barrier in place, but
+ * AIO can do EOF updates during IO completion and hence
+ * we now need to wait for all of them to drain. Non-AIO
+ * DIO will have drained before we are given the
+ * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+ * no-op.
+ */
+ inode_dio_wait(inode);
goto restart;
}
- error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
+ error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
if (error)
return error;
- }
+ } else
+ spin_unlock(&ip->i_flags_lock);
/*
* Updating the timestamps will grab the ilock again from
@@@ -680,11 -702,11 +703,12 @@@ xfs_file_dio_aio_write
xfs_rw_ilock(ip, iolock);
}
- ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
+ ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
- iov_iter_truncate(from, count);
+ count = iov_iter_count(from);
+ pos = iocb->ki_pos;
+ end = pos + count - 1;
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@@ -1385,8 -1449,59 +1449,57 @@@ xfs_file_llseek
}
}
+ /*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ * i_mmap_lock (XFS - truncate serialisation)
+ * page_lock (MM)
+ * i_lock (XFS - extent map serialisation)
+ */
+ STATIC int
+ xfs_filemap_fault(
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+ {
+ struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
+ int error;
+
+ trace_xfs_filemap_fault(ip);
+
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ error = filemap_fault(vma, vmf);
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+ return error;
+ }
+
+ /*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+ STATIC int
+ xfs_filemap_page_mkwrite(
+ struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+ {
+ struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
+ int error;
+
+ trace_xfs_filemap_page_mkwrite(ip);
+
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+ return error;
+ }
+
const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
.splice_read = xfs_file_splice_read,
Content of type "application/pgp-signature" skipped
Powered by blists - more mailing lists