[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080604155955.GJ16572@duck.suse.cz>
Date: Wed, 4 Jun 2008 17:59:55 +0200
From: Jan Kara <jack@...e.cz>
To: linux-ext4@...r.kernel.org
Subject: [PATCH 4/5] ext4: Use new framework for data=ordered mode in JBD2
From: Jan Kara <jack@...e.cz>
Date: Thu, 22 May 2008 00:51:17 +0200
Subject: [PATCH] ext4: Use new framework for data=ordered mode in JBD2
Signed-off-by: Jan Kara <jack@...e.cz>
---
fs/ext4/ext4_i.h | 1 +
fs/ext4/ext4_jbd2.h | 7 ++-
fs/ext4/ialloc.c | 1 +
fs/ext4/inode.c | 160 ++++++++++++++++++---------------------------------
fs/ext4/super.c | 4 +-
5 files changed, 67 insertions(+), 106 deletions(-)
Index: linux-2.6-linus/fs/ext4/ext4_i.h
===================================================================
--- linux-2.6-linus.orig/fs/ext4/ext4_i.h
+++ linux-2.6-linus/fs/ext4/ext4_i.h
@@ -150,6 +150,7 @@ struct ext4_inode_info {
*/
struct rw_semaphore i_data_sem;
struct inode vfs_inode;
+ struct jbd2_inode jinode;
unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
Index: linux-2.6-linus/fs/ext4/ext4_jbd2.h
===================================================================
--- linux-2.6-linus.orig/fs/ext4/ext4_jbd2.h
+++ linux-2.6-linus/fs/ext4/ext4_jbd2.h
@@ -154,8 +154,6 @@ int __ext4_journal_dirty_metadata(const
#define ext4_journal_forget(handle, bh) \
__ext4_journal_forget(__FUNCTION__, (handle), (bh))
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
-
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, handle_t *handle);
@@ -192,6 +190,11 @@ static inline int ext4_journal_force_com
return jbd2_journal_force_commit(journal);
}
+static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
+{
+ return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+}
+
/* super.c */
int ext4_force_commit(struct super_block *sb);
Index: linux-2.6-linus/fs/ext4/ialloc.c
===================================================================
--- linux-2.6-linus.orig/fs/ext4/ialloc.c
+++ linux-2.6-linus/fs/ext4/ialloc.c
@@ -820,6 +820,7 @@ got:
ei->i_state = EXT4_STATE_NEW;
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
+ jbd2_journal_init_jbd_inode(&ei->jinode, inode);
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
Index: linux-2.6-linus/fs/ext4/inode.c
===================================================================
--- linux-2.6-linus.orig/fs/ext4/inode.c
+++ linux-2.6-linus/fs/ext4/inode.c
@@ -39,6 +39,13 @@
#include "xattr.h"
#include "acl.h"
+static inline int ext4_begin_ordered_truncate(struct inode *inode,
+ loff_t new_size)
+{
+ return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
+ new_size);
+}
+
/*
* Test whether an inode is a fast symlink.
*/
@@ -181,6 +188,8 @@ void ext4_delete_inode (struct inode * i
{
handle_t *handle;
+ if (ext4_should_order_data(inode))
+ ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode))
@@ -1273,15 +1282,6 @@ out:
return ret;
}
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
-{
- int err = jbd2_journal_dirty_data(handle, bh);
- if (err)
- ext4_journal_abort_handle(__func__, __func__,
- bh, handle, err);
- return err;
-}
-
/* For write_end() in data=journal mode */
static int write_end_fn(handle_t *handle, struct buffer_head *bh)
{
@@ -1311,8 +1311,7 @@ static int ext4_ordered_write_end(struct
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
- ret = walk_page_buffers(handle, page_buffers(page),
- from, to, NULL, ext4_journal_dirty_data);
+ ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
/*
@@ -1472,25 +1471,22 @@ static int bput_one(handle_t *handle, st
return 0;
}
-static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
-{
- if (buffer_mapped(bh))
- return ext4_journal_dirty_data(handle, bh);
- return 0;
-}
-
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
{
return !buffer_mapped(bh) || buffer_delay(bh);
}
/*
- * Note that we don't need to start a transaction unless we're journaling
- * data because we should have holes filled from ext4_page_mkwrite(). If
- * we are journaling data, we cannot start transaction directly because
- * transaction start ranks above page lock so we have to do some magic...
+ * Note that we don't need to start a transaction unless we're journaling data
+ * because we should have holes filled from ext4_page_mkwrite(). We even don't
+ * need to file the inode to the transaction's list in ordered mode because if
+ * we are writing back data added by write(), the inode is already there and if
+ * we are writing back data modified via mmap(), noone guarantees in which
+ * transaction the data will hit the disk. In case we are journaling data, we
+ * cannot start transaction directly because transaction start ranks above page
+ * lock so we have to do some magic.
*
- * In all journalling modes block_write_full_page() will start the I/O.
+ * In all journaling modes block_write_full_page() will start the I/O.
*
* Problem:
*
@@ -1533,86 +1529,7 @@ static int ext4_bh_unmapped_or_delay(han
* us.
*
*/
-static int __ext4_ordered_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- struct buffer_head *page_bufs;
- handle_t *handle = NULL;
- int ret = 0;
- int err;
-
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, inode->i_sb->s_blocksize,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
- }
- page_bufs = page_buffers(page);
- walk_page_buffers(handle, page_bufs, 0,
- PAGE_CACHE_SIZE, NULL, bget_one);
-
- ret = block_write_full_page(page, ext4_get_block, wbc);
-
- /*
- * The page can become unlocked at any point now, and
- * truncate can then come in and change things. So we
- * can't touch *page from now on. But *page_bufs is
- * safe due to elevated refcount.
- */
-
- /*
- * And attach them to the current transaction. But only if
- * block_write_full_page() succeeded. Otherwise they are unmapped,
- * and generally junk.
- */
- if (ret == 0) {
- handle = ext4_journal_start(inode,
- ext4_writepage_trans_blocks(inode));
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out_put;
- }
-
- ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
- NULL, jbd2_journal_dirty_data_fn);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- }
-out_put:
- walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
- bput_one);
- return ret;
-}
-
-static int ext4_ordered_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- loff_t size = i_size_read(inode);
- loff_t len;
-
- J_ASSERT(PageLocked(page));
- J_ASSERT(page_has_buffers(page));
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
- BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
- ext4_bh_unmapped_or_delay));
-
- /*
- * We give up here if we're reentered, because it might be for a
- * different filesystem.
- */
- if (!ext4_journal_current_handle())
- return __ext4_ordered_writepage(page, wbc);
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
-
-static int __ext4_writeback_writepage(struct page *page,
+static int __ext4_normal_writepage(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
@@ -1624,7 +1541,7 @@ static int __ext4_writeback_writepage(st
}
-static int ext4_writeback_writepage(struct page *page,
+static int ext4_normal_writepage(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
@@ -1641,7 +1558,7 @@ static int ext4_writeback_writepage(stru
ext4_bh_unmapped_or_delay));
if (!ext4_journal_current_handle())
- return __ext4_writeback_writepage(page, wbc);
+ return __ext4_normal_writepage(page, wbc);
redirty_page_for_writepage(wbc, page);
unlock_page(page);
@@ -1877,7 +1794,7 @@ static int ext4_journalled_set_page_dirt
static const struct address_space_operations ext4_ordered_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
- .writepage = ext4_ordered_writepage,
+ .writepage = ext4_normal_writepage,
.sync_page = block_sync_page,
.write_begin = ext4_write_begin,
.write_end = ext4_ordered_write_end,
@@ -1891,7 +1808,7 @@ static const struct address_space_operat
static const struct address_space_operations ext4_writeback_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
- .writepage = ext4_writeback_writepage,
+ .writepage = ext4_normal_writepage,
.sync_page = block_sync_page,
.write_begin = ext4_write_begin,
.write_end = ext4_writeback_write_end,
@@ -2019,7 +1936,7 @@ int ext4_block_truncate_page(handle_t *h
err = ext4_journal_dirty_metadata(handle, bh);
} else {
if (ext4_should_order_data(inode))
- err = ext4_journal_dirty_data(handle, bh);
+ err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
@@ -2787,6 +2704,7 @@ struct inode *ext4_iget(struct super_blo
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
ei->i_block_alloc_info = NULL;
+ jbd2_journal_init_jbd_inode(&ei->jinode, inode);
ret = __ext4_get_inode_loc(inode, &iloc, 0);
if (ret < 0)
@@ -3149,7 +3067,14 @@ int ext4_write_inode(struct inode *inode
* be freed, so we have a strong guarantee that no future commit will
* leave these blocks visible to the user.)
*
- * Called with inode->sem down.
+ * Another thing we have to assure is that if we are in ordered mode
+ * and inode is still attached to the committing transaction, we must
+ * we start writeout of all the dirty pages which are being truncated.
+ * This way we are sure that all the data written in the previous
+ * transaction are already on disk (truncate waits for pages under
+ * writeback).
+ *
+ * Called with inode->i_mutex down.
*/
int ext4_setattr(struct dentry *dentry, struct iattr *attr)
{
@@ -3215,6 +3140,22 @@ int ext4_setattr(struct dentry *dentry,
if (!error)
error = rc;
ext4_journal_stop(handle);
+
+ if (ext4_should_order_data(inode)) {
+ error = ext4_begin_ordered_truncate(inode,
+ attr->ia_size);
+ if (error) {
+ /* Do as much error cleanup as possible */
+ handle = ext4_journal_start(inode, 3);
+ if (IS_ERR(handle)) {
+ ext4_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+ goto err_out;
+ }
+ }
}
rc = inode_setattr(inode, attr);
@@ -3624,12 +3565,13 @@ int ext4_page_mkwrite(struct vm_area_str
lock_page(page);
wbc.range_start = page_offset(page);
wbc.range_end = page_offset(page) + len;
- if (ext4_should_writeback_data(inode))
- ret = __ext4_writeback_writepage(page, &wbc);
- else if (ext4_should_order_data(inode))
- ret = __ext4_ordered_writepage(page, &wbc);
- else
+ if (!ext4_should_journal_data(inode)) {
+ ret = __ext4_normal_writepage(page, &wbc);
+ if (!ret && ext4_should_order_data(inode))
+ ret = ext4_jbd2_file_inode(handle, inode);
+ } else {
ret = __ext4_journalled_writepage(page, &wbc);
+ }
/* Page got unlocked in writepage */
err = ext4_journal_stop(handle);
if (!ret)
Index: linux-2.6-linus/fs/ext4/super.c
===================================================================
--- linux-2.6-linus.orig/fs/ext4/super.c
+++ linux-2.6-linus/fs/ext4/super.c
@@ -637,6 +637,8 @@ static void ext4_clear_inode(struct inod
EXT4_I(inode)->i_block_alloc_info = NULL;
if (unlikely(rsv))
kfree(rsv);
+ jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+ &EXT4_I(inode)->jinode);
}
static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -3353,7 +3355,7 @@ static ssize_t ext4_quota_write(struct s
err = ext4_journal_dirty_metadata(handle, bh);
else {
/* Always do at least ordered writes for quotas */
- err = ext4_journal_dirty_data(handle, bh);
+ err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
brelse(bh);
Index: linux-2.6-linus/fs/ext4/mballoc.c
===================================================================
--- linux-2.6-linus.orig/fs/ext4/mballoc.c
+++ linux-2.6-linus/fs/ext4/mballoc.c
@@ -2255,6 +2255,8 @@ static int ext4_mb_init_backend(struct s
printk(KERN_ERR "EXT4-fs: can't get new inode\n");
goto err_freesgi;
}
+ jbd2_journal_init_jbd_inode(&EXT4_I(sbi->s_buddy_cache)->jinode,
+ sbi->s_buddy_cache);
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists