[<prev] [next>] [day] [month] [year] [list]
Message-id: <001301cf6e3f$9eee0df0$dcca29d0$@samsung.com>
Date: Tue, 13 May 2014 09:09:34 +0900
From: Namjae Jeon <namjae.jeon@...sung.com>
To: Theodore Ts'o <tytso@....edu>
Cc: linux-ext4 <linux-ext4@...r.kernel.org>,
Lukáš Czerner <lczerner@...hat.com>,
Ashish Sangwan <a.sangwan@...sung.com>
Subject: [PATCH] ext4: introduce new i_write_mutex to protect fallocate
Introduce new i_write_mutex to protect new writes from coming while doing
fallocate operations. Also, get rid of aio_mutex as it is covered by
i_write_mutex.
Signed-off-by: Namjae Jeon <namjae.jeon@...sung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@...sung.com>
---
fs/ext4/ext4.h | 6 +++---
fs/ext4/extents.c | 18 +++++++++++++++---
fs/ext4/file.c | 18 +++++++++++-------
fs/ext4/inode.c | 7 ++++++-
fs/ext4/super.c | 3 +--
5 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6b45afa..77e5705 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -943,6 +943,9 @@ struct ext4_inode_info {
/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
__u32 i_csum_seed;
+
+ /* protects fallocate operations racing with new writes */
+ struct mutex i_write_mutex;
};
/*
@@ -2827,10 +2830,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
#define EXT4_WQ_HASH_SZ 37
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
-#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
- EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
#define EXT4_RESIZING 0
extern int ext4_resize_begin(struct super_block *sb);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 086baa9..5262750 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4741,6 +4741,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (!S_ISREG(inode->i_mode))
return -EINVAL;
+ mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
@@ -4748,8 +4750,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
ret = filemap_write_and_wait_range(mapping, offset,
offset + len - 1);
- if (ret)
+ if (ret) {
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
+ }
}
/*
@@ -4761,8 +4765,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
start = round_up(offset, 1 << blkbits);
end = round_down((offset + len), 1 << blkbits);
- if (start < offset || end > offset + len)
+ if (start < offset || end > offset + len) {
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return -EINVAL;
+ }
partial = (offset + len) & ((1 << blkbits) - 1);
lblk = start >> blkbits;
@@ -4859,6 +4865,7 @@ out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
}
@@ -5428,11 +5435,15 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
+ mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
/* Call ext4_force_commit to flush all data in case of data=journal. */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
- if (ret)
+ if (ret) {
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
+ }
}
/*
@@ -5518,5 +5529,6 @@ out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 708aad7..557b4ac 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -93,7 +93,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(iocb->ki_filp);
- struct mutex *aio_mutex = NULL;
+ bool unaligned_direct_aio = false;
struct blk_plug plug;
int o_direct = file->f_flags & O_DIRECT;
int overwrite = 0;
@@ -101,6 +101,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;
loff_t pos = iocb->ki_pos;
+ mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
/*
* Unaligned direct AIO must be serialized; see comment above
* In the case of O_APPEND, assume that we must always serialize
@@ -110,8 +112,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
!is_sync_kiocb(iocb) &&
(file->f_flags & O_APPEND ||
ext4_unaligned_aio(inode, from, pos))) {
- aio_mutex = ext4_aio_mutex(inode);
- mutex_lock(aio_mutex);
+ unaligned_direct_aio = true;
ext4_unwritten_wait(inode);
}
@@ -143,8 +144,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* check whether we do a DIO overwrite or not */
- if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
- !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
+ if (ext4_should_dioread_nolock(inode) &&
+ !unaligned_direct_aio && !file->f_mapping->nrpages &&
+ pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, len;
@@ -174,6 +176,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = __generic_file_write_iter(iocb, from);
mutex_unlock(&inode->i_mutex);
+ if (!unaligned_direct_aio)
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
if (ret > 0) {
ssize_t err;
@@ -186,8 +190,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_finish_plug(&plug);
errout:
- if (aio_mutex)
- mutex_unlock(aio_mutex);
+ if (unaligned_direct_aio)
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b1dc334..d804120 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3528,6 +3528,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
trace_ext4_punch_hole(inode, offset, length, 0);
+ mutex_lock(&EXT4_I(inode)->i_write_mutex);
+
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
@@ -3535,8 +3537,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
ret = filemap_write_and_wait_range(mapping, offset,
offset + length - 1);
- if (ret)
+ if (ret) {
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
+ }
}
mutex_lock(&inode->i_mutex);
@@ -3637,6 +3641,7 @@ out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&EXT4_I(inode)->i_write_mutex);
return ret;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1f8cb18..e236c85 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -904,6 +904,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
+ mutex_init(&ei->i_write_mutex);
return &ei->vfs_inode;
}
@@ -5505,7 +5506,6 @@ static void ext4_exit_feat_adverts(void)
/* Shared across all ext4 file systems */
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
static int __init ext4_init_fs(void)
{
@@ -5518,7 +5518,6 @@ static int __init ext4_init_fs(void)
ext4_check_flag_values();
for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
- mutex_init(&ext4__aio_mutex[i]);
init_waitqueue_head(&ext4__ioend_wq[i]);
}
--
1.7.11-rc0
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists