linux-kernel - [PATCH 3.16 097/125] ext4: fix transaction issues for ext4_fallocate and ext_zero

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20140903220626.596026093@linuxfoundation.org>
Date:	Wed,  3 Sep 2014 15:07:34 -0700
From:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To:	linux-kernel@...r.kernel.org
Cc:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	stable@...r.kernel.org, Dmitry Monakhov <dmonakhov@...nvz.org>,
	Theodore Tso <tytso@....edu>
Subject: [PATCH 3.16 097/125] ext4: fix transaction issues for ext4_fallocate and ext_zero_range

3.16-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Dmitry Monakhov <dmonakhov@...nvz.org>

commit c174e6d6979a04b7b77b93f244396be4b81f8bfb upstream.

After commit f282ac19d86f we use different transactions for
preallocation and i_disksize update which result in complain from fsck
after power-failure.  spotted by generic/019. IMHO this is regression
because fs becomes inconsistent, even more 'e2fsck -p' will no longer
works (which drives admins go crazy) Same transaction requirement
applies ctime,mtime updates

testcase: xfstest generic/019

Signed-off-by: Dmitry Monakhov <dmonakhov@...nvz.org>
Signed-off-by: Theodore Ts'o <tytso@....edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>

---
 fs/ext4/extents.c |   68 +++++++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4664,7 +4664,8 @@ retry:
 }
 
 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
-				  ext4_lblk_t len, int flags, int mode)
+				  ext4_lblk_t len, loff_t new_size,
+				  int flags, int mode)
 {
 	struct inode *inode = file_inode(file);
 	handle_t *handle;
@@ -4673,8 +4674,10 @@ static int ext4_alloc_file_blocks(struct
 	int retries = 0;
 	struct ext4_map_blocks map;
 	unsigned int credits;
+	loff_t epos;
 
 	map.m_lblk = offset;
+	map.m_len = len;
 	/*
 	 * Don't normalize the request if it can fit in one extent so
 	 * that it doesn't get unnecessarily split into multiple
@@ -4689,9 +4692,7 @@ static int ext4_alloc_file_blocks(struct
 	credits = ext4_chunk_trans_blocks(inode, len);
 
 retry:
-	while (ret >= 0 && ret < len) {
-		map.m_lblk = map.m_lblk + ret;
-		map.m_len = len = len - ret;
+	while (ret >= 0 && len) {
 		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
 					    credits);
 		if (IS_ERR(handle)) {
@@ -4708,6 +4709,21 @@ retry:
 			ret2 = ext4_journal_stop(handle);
 			break;
 		}
+		map.m_lblk += ret;
+		map.m_len = len = len - ret;
+		epos = (loff_t)map.m_lblk << inode->i_blkbits;
+		inode->i_ctime = ext4_current_time(inode);
+		if (new_size) {
+			if (epos > new_size)
+				epos = new_size;
+			if (ext4_update_inode_size(inode, epos) & 0x1)
+				inode->i_mtime = inode->i_ctime;
+		} else {
+			if (epos > inode->i_size)
+				ext4_set_inode_flag(inode,
+						    EXT4_INODE_EOFBLOCKS);
+		}
+		ext4_mark_inode_dirty(handle, inode);
 		ret2 = ext4_journal_stop(handle);
 		if (ret2)
 			break;
@@ -4731,7 +4747,7 @@ static long ext4_zero_range(struct file
 	int ret = 0;
 	int flags;
 	int credits;
-	int partial;
+	int partial_begin, partial_end;
 	loff_t start, end;
 	ext4_lblk_t lblk;
 	struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4787,8 @@ static long ext4_zero_range(struct file
 
 	if (start < offset || end > offset + len)
 		return -EINVAL;
-	partial = (offset + len) & ((1 << blkbits) - 1);
+	partial_begin = offset & ((1 << blkbits) - 1);
+	partial_end = (offset + len) & ((1 << blkbits) - 1);
 
 	lblk = start >> blkbits;
 	max_blocks = (end >> blkbits);
@@ -4805,7 +4822,7 @@ static long ext4_zero_range(struct file
 		 * If we have a partial block after EOF we have to allocate
 		 * the entire block.
 		 */
-		if (partial)
+		if (partial_end)
 			max_blocks += 1;
 	}
 
@@ -4813,6 +4830,7 @@ static long ext4_zero_range(struct file
 
 		/* Now release the pages and zero block aligned part of pages*/
 		truncate_pagecache_range(inode, start, end - 1);
+		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
 		/* Wait all existing dio workers, newcomers will block on i_mutex */
 		ext4_inode_block_unlocked_dio(inode);
@@ -4825,11 +4843,14 @@ static long ext4_zero_range(struct file
 		if (ret)
 			goto out_dio;
 
-		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
-					     mode);
+		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+					     flags, mode);
 		if (ret)
 			goto out_dio;
 	}
+	if (!partial_begin && !partial_end)
+		goto out_dio;
+
 	/*
 	 * In worst case we have to writeout two nonadjacent unwritten
 	 * blocks and update the inode
@@ -4855,7 +4876,6 @@ static long ext4_zero_range(struct file
 		if ((offset + len) > i_size_read(inode))
 			ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
 	}
-
 	ext4_mark_inode_dirty(handle, inode);
 
 	/* Zero out partial block at the edges of the range */
@@ -4882,7 +4902,6 @@ out_mutex:
 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(file);
-	handle_t *handle;
 	loff_t new_size = 0;
 	unsigned int max_blocks;
 	int ret = 0;
@@ -4938,32 +4957,15 @@ long ext4_fallocate(struct file *file, i
 			goto out;
 	}
 
-	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
+	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+				     flags, mode);
 	if (ret)
 		goto out;
 
-	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
-	if (IS_ERR(handle))
-		goto out;
-
-	inode->i_ctime = ext4_current_time(inode);
-
-	if (new_size) {
-		if (ext4_update_inode_size(inode, new_size) & 0x1)
-			inode->i_mtime = inode->i_ctime;
-	} else {
-		/*
-		* Mark that we allocate beyond EOF so the subsequent truncate
-		* can proceed even if the new size is the same as i_size.
-		*/
-		if ((offset + len) > i_size_read(inode))
-			ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+	if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
+		ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+						EXT4_I(inode)->i_sync_tid);
 	}
-	ext4_mark_inode_dirty(handle, inode);
-	if (file->f_flags & O_SYNC)
-		ext4_handle_sync(handle);
-
-	ext4_journal_stop(handle);
 out:
 	mutex_unlock(&inode->i_mutex);
 	trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/