[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Fri, 12 Feb 2016 13:25:06 -0500
From: Eric Whitney <enwlinux@...il.com>
To: linux-ext4@...r.kernel.org
Cc: tytso@....edu, jack@...e.cz
Subject: [PATCH] ext4: revert i_data_sum locking cleanups for dioread_nolock
Commit 2bcba4781fa3 ("ext4: get rid of EXT4_GET_BLOCKS_NO_LOCK flag")
can cause a kernel panic when xfstest generic/300 is run on a file
system mounted with dioread_nolock. The panic is typically triggered
from check_irqs_on() (fs/buffer.c: 1272), and happens because
ext4_end_io_dio() is being called in an interrupt context rather than
from a workqueue for AIO. This suggests that buffer_defer_completion
may not be set properly when creating an unwritten extent for async
direct I/O.
Revert the locking changes until this problem can be resolved. Patch
applied to 4.5-rc3 and tested with a full xfstest-bld auto group run.
Signed-off-by: Eric Whitney <enwlinux@...il.com>
---
fs/ext4/ext4.h | 6 ++++--
fs/ext4/inode.c | 43 +++++++++++++++++++++++--------------------
include/trace/events/ext4.h | 1 +
3 files changed, 28 insertions(+), 22 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b28..36fcf2a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -563,10 +563,12 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
+ /* Do not take i_data_sem locking in ext4_map_blocks */
+#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
/* Write zeros to newly created written extents */
-#define EXT4_GET_BLOCKS_ZERO 0x0200
+#define EXT4_GET_BLOCKS_ZERO 0x0300
#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
EXT4_GET_BLOCKS_ZERO)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bf..f72dc04 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -418,7 +418,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
* out taking i_data_sem. So at the time the unwritten extent
* could be converted.
*/
- down_read(&EXT4_I(inode)->i_data_sem);
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -426,7 +427,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
- up_read((&EXT4_I(inode)->i_data_sem));
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ up_read((&EXT4_I(inode)->i_data_sem));
/*
* We don't check m_len because extent will be collpased in status
@@ -522,7 +524,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* Try to see if we can get the block without requesting a new
* file system block.
*/
- down_read(&EXT4_I(inode)->i_data_sem);
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -553,7 +556,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (ret < 0)
retval = ret;
}
- up_read((&EXT4_I(inode)->i_data_sem));
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ up_read((&EXT4_I(inode)->i_data_sem));
found:
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -703,7 +707,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
- if (flags && !handle) {
+ if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
/* Direct IO write... */
if (map.m_len > DIO_MAX_BLOCKS)
map.m_len = DIO_MAX_BLOCKS;
@@ -898,6 +902,9 @@ int do_journal_get_write_access(handle_t *handle,
return ret;
}
+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
@@ -3070,21 +3077,13 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- int ret;
-
- ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
+ ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
inode->i_ino, create);
- ret = _ext4_get_block(inode, iblock, bh_result, 0);
- /*
- * Blocks should have been preallocated! ext4_file_write_iter() checks
- * that.
- */
- WARN_ON_ONCE(!buffer_mapped(bh_result));
-
- return ret;
+ return _ext4_get_block(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_NO_LOCK);
}
#ifdef CONFIG_FS_DAX
@@ -3230,8 +3229,10 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
/* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private);
- if (overwrite)
+ if (overwrite) {
+ down_read(&EXT4_I(inode)->i_data_sem);
inode_unlock(inode);
+ }
/*
* We could direct write to holes and fallocate.
@@ -3274,7 +3275,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
if (overwrite) {
- get_block_func = ext4_get_block_overwrite;
+ get_block_func = ext4_get_block_write_nolock;
} else {
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
@@ -3330,8 +3331,10 @@ retake_lock:
if (iov_iter_rw(iter) == WRITE)
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
- if (overwrite)
+ if (overwrite) {
+ up_read(&EXT4_I(inode)->i_data_sem);
inode_lock(inode);
+ }
return ret;
}
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 4e4b2fa..6599e75 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -43,6 +43,7 @@ struct extent_status;
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
+ { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \
{ EXT4_GET_BLOCKS_ZERO, "ZERO" })
#define show_mflags(flags) __print_flags(flags, "", \
--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists