linux-kernel - linux-next: manual merge of the xfs tree with Linus' tree

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20150420122412.440d6e66@canb.auug.org.au>
Date:	Mon, 20 Apr 2015 12:24:12 +1000
From:	Stephen Rothwell <sfr@...b.auug.org.au>
To:	Ben Myers <bpm@....com>, David Chinner <david@...morbit.com>,
	xfs@....sgi.com
Cc:	linux-next@...r.kernel.org, linux-kernel@...r.kernel.org,
	Al Viro <viro@...iv.linux.org.uk>
Subject: linux-next: manual merge of the xfs tree with Linus' tree

Hi all,

Today's linux-next merge of the xfs tree got a conflict in
fs/xfs/xfs_file.c between commits 99733fa372ea
("xfs_file_aio_write_checks: switch to iocb/iov_iter") and 3309dd04cbcd
("switch generic_write_checks() to iocb and iter") from Linus' tree and
commits b9d59846f737 ("xfs: DIO write completion size updates race"),
40c63fbc55a9 ("xfs: direct IO EOF zeroing needs to drain AIO") and
0cefb29e6a63 ("xfs: using generic_file_direct_write() is unnecessary")
from the xfs tree.

I fixed it up (I have no idea if this is right - see below) and can
carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwell                    sfr@...b.auug.org.au

diff --cc fs/xfs/xfs_file.c
index 1f12ad0a8585,3a5d305e60c9..000000000000
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -544,22 -545,21 +544,22 @@@ xfs_zero_eof
   */
  STATIC ssize_t
  xfs_file_aio_write_checks(
 -	struct file		*file,
 -	loff_t			*pos,
 -	size_t			*count,
 +	struct kiocb		*iocb,
 +	struct iov_iter		*from,
  	int			*iolock)
  {
 +	struct file		*file = iocb->ki_filp;
  	struct inode		*inode = file->f_mapping->host;
  	struct xfs_inode	*ip = XFS_I(inode);
 -	int			error = 0;
 +	ssize_t			error = 0;
 +	size_t			count = iov_iter_count(from);
  
  restart:
 -	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
 -	if (error)
 +	error = generic_write_checks(iocb, from);
 +	if (error <= 0)
  		return error;
  
- 	error = xfs_break_layouts(inode, iolock);
+ 	error = xfs_break_layouts(inode, iolock, true);
  	if (error)
  		return error;
  
@@@ -569,21 -569,41 +569,42 @@@
  	 * write.  If zeroing is needed and we are currently holding the
  	 * iolock shared, we need to update it to exclusive which implies
  	 * having to redo all checks before.
+ 	 *
+ 	 * We need to serialise against EOF updates that occur in IO
+ 	 * completions here. We want to make sure that nobody is changing the
+ 	 * size while we do this check until we have placed an IO barrier (i.e.
+ 	 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+ 	 * The spinlock effectively forms a memory barrier once we have the
+ 	 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+ 	 * and hence be able to correctly determine if we need to run zeroing.
  	 */
+ 	spin_lock(&ip->i_flags_lock);
 -	if (*pos > i_size_read(inode)) {
 +	if (iocb->ki_pos > i_size_read(inode)) {
  		bool	zero = false;
  
+ 		spin_unlock(&ip->i_flags_lock);
  		if (*iolock == XFS_IOLOCK_SHARED) {
  			xfs_rw_iunlock(ip, *iolock);
  			*iolock = XFS_IOLOCK_EXCL;
  			xfs_rw_ilock(ip, *iolock);
 +			iov_iter_reexpand(from, count);
+ 
+ 			/*
+ 			 * We now have an IO submission barrier in place, but
+ 			 * AIO can do EOF updates during IO completion and hence
+ 			 * we now need to wait for all of them to drain. Non-AIO
+ 			 * DIO will have drained before we are given the
+ 			 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+ 			 * no-op.
+ 			 */
+ 			inode_dio_wait(inode);
  			goto restart;
  		}
 -		error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
 +		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
  		if (error)
  			return error;
- 	}
+ 	} else
+ 		spin_unlock(&ip->i_flags_lock);
  
  	/*
  	 * Updating the timestamps will grab the ilock again from
@@@ -680,11 -702,11 +703,12 @@@ xfs_file_dio_aio_write
  		xfs_rw_ilock(ip, iolock);
  	}
  
 -	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
 +	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
  	if (ret)
  		goto out;
 -	iov_iter_truncate(from, count);
 +	count = iov_iter_count(from);
 +	pos = iocb->ki_pos;
+ 	end = pos + count - 1;
  
  	if (mapping->nrpages) {
  		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@@ -1385,8 -1449,59 +1449,57 @@@ xfs_file_llseek
  	}
  }
  
+ /*
+  * Locking for serialisation of IO during page faults. This results in a lock
+  * ordering of:
+  *
+  * mmap_sem (MM)
+  *   i_mmap_lock (XFS - truncate serialisation)
+  *     page_lock (MM)
+  *       i_lock (XFS - extent map serialisation)
+  */
+ STATIC int
+ xfs_filemap_fault(
+ 	struct vm_area_struct	*vma,
+ 	struct vm_fault		*vmf)
+ {
+ 	struct xfs_inode	*ip = XFS_I(vma->vm_file->f_mapping->host);
+ 	int			error;
+ 
+ 	trace_xfs_filemap_fault(ip);
+ 
+ 	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ 	error = filemap_fault(vma, vmf);
+ 	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ 
+ 	return error;
+ }
+ 
+ /*
+  * mmap()d file has taken write protection fault and is being made writable. We
+  * can set the page state up correctly for a writable page, which means we can
+  * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+  * mapping.
+  */
+ STATIC int
+ xfs_filemap_page_mkwrite(
+ 	struct vm_area_struct	*vma,
+ 	struct vm_fault		*vmf)
+ {
+ 	struct xfs_inode	*ip = XFS_I(vma->vm_file->f_mapping->host);
+ 	int			error;
+ 
+ 	trace_xfs_filemap_page_mkwrite(ip);
+ 
+ 	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ 	error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ 	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ 
+ 	return error;
+ }
+ 
  const struct file_operations xfs_file_operations = {
  	.llseek		= xfs_file_llseek,
 -	.read		= new_sync_read,
 -	.write		= new_sync_write,
  	.read_iter	= xfs_file_read_iter,
  	.write_iter	= xfs_file_write_iter,
  	.splice_read	= xfs_file_splice_read,

Content of type "application/pgp-signature" skipped