[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1321893597-11024-5-git-send-email-tm@tao.ma>
Date: Tue, 22 Nov 2011 00:39:43 +0800
From: Tao Ma <tm@....ma>
To: linux-ext4@...r.kernel.org
Cc: tytso@....edu, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, adilger@...ger.ca
Subject: [PATCH V2 05/19] ext4: Add normal write support for inline data.
From: Tao Ma <boyu.mt@...bao.com>
For a normal write case(not journalled write, not delayed allocation),
we write to the inline if the file is small and convert it to an extent
based file when the write is larger than the max inline size.
Signed-off-by: Tao Ma <boyu.mt@...bao.com>
---
fs/ext4/ext4.h | 11 +++
fs/ext4/extents.c | 9 ++-
fs/ext4/inline.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/inode.c | 40 +++++++---
fs/ext4/xattr.h | 26 ++++++
5 files changed, 297 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3e560ba..d9603c1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1861,8 +1861,19 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
+int ext4_get_block_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+int walk_page_buffers(handle_t *handle,
+ struct buffer_head *head,
+ unsigned from,
+ unsigned to,
+ int *partial,
+ int (*fn)(handle_t *handle,
+ struct buffer_head *bh));
+int do_journal_get_write_access(handle_t *handle,
+ struct buffer_head *bh);
extern struct inode *ext4_iget(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 61fa9e1..a0063b7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -42,6 +42,7 @@
#include <asm/uaccess.h>
#include <linux/fiemap.h>
#include "ext4_jbd2.h"
+#include "xattr.h"
#include <trace/events/ext4.h>
@@ -2178,7 +2179,13 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
int index;
- int depth = ext_depth(inode);
+ int depth;
+
+ /* If we are converting the inline data, only one is needed here. */
+ if (ext4_has_inline_data(inode))
+ return 1;
+
+ depth = ext_depth(inode);
if (chunk)
index = depth * 2;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index fa8f554..1d1572b 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -14,6 +14,7 @@
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
+#include "truncate.h"
#define EXT4_XATTR_SYSTEM_DATA_NAME "data"
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -486,6 +487,228 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
return ret;
}
+static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
+ struct inode *inode,
+ unsigned flags)
+{
+ int ret, needed_blocks;
+ handle_t *handle = NULL;
+ int retries = 0;
+ struct page *page = NULL;
+ unsigned from, to;
+ struct ext4_iloc iloc;
+
+ if (!ext4_has_inline_data(inode)) {
+ /*
+ * clear the flag so that no new write
+ * will trap here again.
+ */
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ return 0;
+ }
+
+ needed_blocks = ext4_writepage_trans_blocks(inode);
+ from = 0;
+ to = ext4_get_inline_size(inode);
+
+ ret = ext4_get_inode_loc(inode, &iloc);
+ if (ret)
+ return ret;
+
+retry:
+ handle = ext4_journal_start(inode, needed_blocks);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ goto out;
+ }
+
+ down_write(&EXT4_I(inode)->xattr_sem);
+ /* If some one has already done this for us, just exit. */
+ if (!ext4_has_inline_data(inode))
+ goto out;
+
+ /* We cannot recurse into the filesystem as the transaction is already
+ * started */
+ flags |= AOP_FLAG_NOFS;
+
+ page = grab_cache_page_write_begin(mapping, 0, flags);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!PageUptodate(page)) {
+ void *kaddr = kmap_atomic(page, KM_USER0);
+ ret = ext4_read_inline_data(inode, kaddr,
+ i_size_read(inode));
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ SetPageUptodate(page);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = ext4_destroy_inline_data_nolock(handle, inode);
+ if (ret)
+ goto out;
+
+ if (ext4_should_dioread_nolock(inode))
+ ret = __block_write_begin(page, from, to, ext4_get_block_write);
+ else
+ ret = __block_write_begin(page, from, to, ext4_get_block);
+
+ if (!ret && ext4_should_journal_data(inode)) {
+ ret = walk_page_buffers(handle, page_buffers(page),
+ from, to, NULL, do_journal_get_write_access);
+ }
+
+ if (ret) {
+ unlock_page(page);
+ page_cache_release(page);
+ ext4_orphan_add(handle, inode);
+ up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_journal_stop(handle);
+ handle = NULL;
+ ext4_truncate_failed_write(inode);
+ /*
+ * If truncate failed early the inode might
+ * still be on the orphan list; we need to
+ * make sure the inode is removed from the
+ * orphan list in that case.
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(NULL, inode);
+ }
+
+ if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
+
+ block_commit_write(page, from, to);
+out:
+ if (page) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ up_write(&EXT4_I(inode)->xattr_sem);
+ if (handle)
+ ext4_journal_stop(handle);
+ brelse(iloc.bh);
+ return ret;
+}
+
+/*
+ * Try to write data in the inode.
+ * If the inode has inline data, check whether the new write can be
+ * in the inode also. If not, create the page the handle, move the data
+ * to the page make it update and let the later codes create extent for it.
+ */
+int ext4_try_to_write_inline_data(struct address_space *mapping,
+ struct inode *inode,
+ loff_t pos, unsigned len,
+ unsigned flags,
+ struct page **pagep)
+{
+ int ret;
+ handle_t *handle;
+ struct page *page;
+ struct ext4_iloc iloc;
+
+ if (pos + len > ext4_get_max_inline_size(inode))
+ goto convert;
+
+ ret = ext4_get_inode_loc(inode, &iloc);
+ if (ret)
+ return ret;
+
+ /*
+ * The possible write could happen in the inode,
+ * so try to reserve the space in inode first.
+ */
+ handle = ext4_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ goto out;
+ }
+
+ ret = ext4_prepare_inline_data(handle, inode, pos + len);
+ if (ret && ret != -ENOSPC)
+ goto out;
+
+ /* We don't have space in inline inode, so convert it to extent. */
+ if (ret == -ENOSPC) {
+ ext4_journal_stop(handle);
+ brelse(iloc.bh);
+ goto convert;
+ }
+
+ flags |= AOP_FLAG_NOFS;
+
+ page = grab_cache_page_write_begin(mapping, 0, flags);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ *pagep = page;
+
+ if (!PageUptodate(page)) {
+ ret = ext4_read_inline_page(inode, page);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = 1;
+ handle = NULL;
+out:
+ if (handle)
+ ext4_journal_stop(handle);
+ brelse(iloc.bh);
+ return ret;
+convert:
+ return ext4_convert_inline_data_to_extent(mapping,
+ inode, flags);
+}
+
+int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
+ unsigned copied, struct page *page)
+{
+ int ret;
+ void *kaddr;
+ struct ext4_iloc iloc;
+
+ if (unlikely(copied < len)) {
+ if (!PageUptodate(page)) {
+ copied = 0;
+ goto out;
+ }
+ }
+
+ ret = ext4_get_inode_loc(inode, &iloc);
+ if (ret) {
+ ext4_std_error(inode->i_sb, ret);
+ copied = 0;
+ goto out;
+ }
+
+ down_write(&EXT4_I(inode)->xattr_sem);
+ BUG_ON(!ext4_has_inline_data(inode));
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
+ kunmap_atomic(kaddr, KM_USER0);
+ SetPageUptodate(page);
+ /* clear page dirty so that writepages wouldn't work for us. */
+ ClearPageDirty(page);
+
+ up_write(&EXT4_I(inode)->xattr_sem);
+ brelse(iloc.bh);
+out:
+ return copied;
+}
+
+
int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
{
int ret;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4f764b7..26ca9a7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -711,7 +711,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return NULL;
}
-static int walk_page_buffers(handle_t *handle,
+int walk_page_buffers(handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
@@ -767,8 +767,8 @@ static int walk_page_buffers(handle_t *handle,
* is elevated. We'll still have enough credits for the tiny quotafile
* write.
*/
-static int do_journal_get_write_access(handle_t *handle,
- struct buffer_head *bh)
+int do_journal_get_write_access(handle_t *handle,
+ struct buffer_head *bh)
{
int dirty = buffer_dirty(bh);
int ret;
@@ -791,8 +791,6 @@ static int do_journal_get_write_access(handle_t *handle,
return ret;
}
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -815,6 +813,17 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
+ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+ ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
+ flags, pagep);
+ if (ret < 0)
+ goto out;
+ if (ret == 1) {
+ ret = 0;
+ goto out;
+ }
+ }
+
retry:
handle = ext4_journal_start(inode, needed_blocks);
if (IS_ERR(handle)) {
@@ -832,6 +841,7 @@ retry:
ret = -ENOMEM;
goto out;
}
+
*pagep = page;
if (ext4_should_dioread_nolock(inode))
@@ -896,7 +906,12 @@ static int ext4_generic_write_end(struct file *file,
struct inode *inode = mapping->host;
handle_t *handle = ext4_journal_current_handle();
- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ if (ext4_has_inline_data(inode))
+ copied = ext4_write_inline_data_end(inode, pos, len,
+ copied, page);
+ else
+ copied = block_write_end(file, mapping, pos,
+ len, copied, page, fsdata);
/*
* No need to use i_size_read() here, the i_size
@@ -2773,7 +2788,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
* We allocate an uinitialized extent if blocks haven't been allocated.
* The extent will be converted to initialized after the IO is complete.
*/
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
+int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
@@ -3795,7 +3810,8 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
ext4_find_inline_data(inode);
- }
+ } else
+ EXT4_I(inode)->i_inline_off = 0;
}
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
@@ -3933,9 +3949,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = -EIO;
goto bad_inode;
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ if (!EXT4_I(inode)->i_inline_off &&
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) &&
- !ext4_inode_is_fast_symlink(inode)))
+ !ext4_inode_is_fast_symlink(inode))))
/* Validate extent which is part of inode */
ret = ext4_ext_check_inode(inode);
} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
@@ -4125,9 +4142,10 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le32(new_encode_dev(inode->i_rdev));
raw_inode->i_block[2] = 0;
}
- } else
+ } else if (!ext4_has_inline_data(inode)) {
for (block = 0; block < EXT4_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block];
+ }
raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
if (ei->i_extra_isize) {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 129ead1..f2d9c53 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -141,6 +141,15 @@ extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
extern int ext4_readpage_inline(struct inode *inode, struct page *page);
+extern int ext4_try_to_write_inline_data(struct address_space *mapping,
+ struct inode *inode,
+ loff_t pos, unsigned len,
+ unsigned flags,
+ struct page **pagep);
+extern int ext4_write_inline_data_end(struct inode *inode,
+ loff_t pos, unsigned len,
+ unsigned copied,
+ struct page *page);
# else /* CONFIG_EXT4_FS_XATTR */
static inline int
@@ -267,6 +276,23 @@ static inline int ext4_readpage_inline(struct inode *inode, struct page *page)
{
return 0;
}
+
+static inline int ext4_try_to_write_inline_data(struct address_space *mapping,
+ struct inode *inode,
+ loff_t pos, unsigned len,
+ unsigned flags,
+ struct page **pagep)
+{
+ return 0;
+}
+
+static inline int ext4_write_inline_data_end(struct inode *inode,
+ loff_t pos, unsigned len,
+ unsigned copied,
+ struct page *page)
+{
+ return 0;
+}
# endif /* CONFIG_EXT4_FS_XATTR */
#ifdef CONFIG_EXT4_FS_SECURITY
--
1.7.0.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists