[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140507135556.GA12433@quack.suse.cz>
Date: Wed, 7 May 2014 15:55:56 +0200
From: Jan Kara <jack@...e.cz>
To: Namjae Jeon <namjae.jeon@...sung.com>
Cc: Theodore Ts'o <tytso@....edu>, Jan Kara <jack@...e.cz>,
linux-ext4 <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH v3] ext4: fix data integrity sync in ordered mode
On Wed 07-05-14 14:15:24, Namjae Jeon wrote:
> When we perform a data integrity sync we tag all the dirty pages with
> PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.
> Later we check for this tag in write_cache_pages_da and creates a
> struct mpage_da_data containing contiguously indexed pages tagged with this
> tag and sync these pages with a call to mpage_da_map_and_submit.
> This process is done in while loop until all the PAGECACHE_TAG_TOWRITE pages
> are synced. We also do journal start and stop in each iteration.
> journal_stop could initiate journal commit which would call ext4_writepage
> which in turn will call ext4_bio_write_page even for delayed OR unwritten
> buffers. When ext4_bio_write_page is called for such buffers, even though it
> does not sync them but it clears the PAGECACHE_TAG_TOWRITE of the corresponding
> page and hence these pages are also not synced by the currently running data
> integrity sync. We will end up with dirty pages although sync is completed.
>
> This could cause a potential data loss when the sync call is followed by a
> truncate_pagecache call, which is exactly the case in collapse_range.
> (It will cause generic/127 failure in xfstests)
>
> To avoid this issue, we can use set_page_writeback_keepwrite instead of
> set_page_writeback, which doesn't clear TOWRITE tag.
>
> Cc: Jan Kara <jack@...e.cz>
> Signed-off-by: Namjae Jeon <namjae.jeon@...sung.com>
> Signed-off-by: Ashish Sangwan <a.sangwan@...sung.com>
The patch looks good. You can add:
Reviewed-by: Jan Kara <jack@...e.cz>
BTW you should also cc linux-mm@...ck.org with this patch since it touches
the mm code.
Honza
> ---
>
> Changelog
> v3:
> - define test_set_page_writeback() and test_set_page_writeback_keepwrite()
> as calls to __test_set_page_writeback(page, keep_write)
>
> v2:
> - As Jan Kara suggested, Create set_page_writeback_keepwrite which doesn't
> clear TOWRITE tag.
>
> fs/ext4/ext4.h | 3 ++-
> fs/ext4/inode.c | 6 ++++--
> fs/ext4/page-io.c | 8 ++++++--
> include/linux/page-flags.h | 12 +++++++++++-
> mm/page-writeback.c | 11 ++++++-----
> 5 files changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6b45afa..78fed7b 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2771,7 +2771,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
> extern int ext4_bio_write_page(struct ext4_io_submit *io,
> struct page *page,
> int len,
> - struct writeback_control *wbc);
> + struct writeback_control *wbc,
> + bool keep_towrite);
>
> /* mmp.c */
> extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index b1dc334..8d9cb26 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *page,
> struct buffer_head *page_bufs = NULL;
> struct inode *inode = page->mapping->host;
> struct ext4_io_submit io_submit;
> + bool keep_towrite = false;
>
> trace_ext4_writepage(page);
> size = i_size_read(inode);
> @@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *page,
> unlock_page(page);
> return 0;
> }
> + keep_towrite = true;
> }
>
> if (PageChecked(page) && ext4_should_journal_data(inode))
> @@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *page,
> unlock_page(page);
> return -ENOMEM;
> }
> - ret = ext4_bio_write_page(&io_submit, page, len, wbc);
> + ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
> ext4_io_submit(&io_submit);
> /* Drop io_end reference we got from init */
> ext4_put_io_end_defer(io_submit.io_end);
> @@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
> else
> len = PAGE_CACHE_SIZE;
> clear_page_dirty_for_io(page);
> - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
> + err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
> if (!err)
> mpd->wbc->nr_to_write--;
> mpd->first_page++;
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index 1a64e7a..4a2dd07 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -401,7 +401,8 @@ submit_and_retry:
> int ext4_bio_write_page(struct ext4_io_submit *io,
> struct page *page,
> int len,
> - struct writeback_control *wbc)
> + struct writeback_control *wbc,
> + bool keep_towrite)
> {
> struct inode *inode = page->mapping->host;
> unsigned block_start, blocksize;
> @@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
> BUG_ON(!PageLocked(page));
> BUG_ON(PageWriteback(page));
>
> - set_page_writeback(page);
> + if (keep_towrite)
> + set_page_writeback_keepwrite(page);
> + else
> + set_page_writeback(page);
> ClearPageError(page);
>
> /*
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index bc2007e..9b68718 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
> extern void cancel_dirty_page(struct page *page, unsigned int account_size);
>
> int test_clear_page_writeback(struct page *page);
> -int test_set_page_writeback(struct page *page);
> +int __test_set_page_writeback(struct page *page, bool keep_write);
> +
> +#define test_set_page_writeback(page) \
> + __test_set_page_writeback(page, false)
> +#define test_set_page_writeback_keepwrite(page) \
> + __test_set_page_writeback(page, true)
>
> static inline void set_page_writeback(struct page *page)
> {
> test_set_page_writeback(page);
> }
>
> +static inline void set_page_writeback_keepwrite(struct page *page)
> +{
> + test_set_page_writeback_keepwrite(page);
> +}
> +
> #ifdef CONFIG_PAGEFLAGS_EXTENDED
> /*
> * System with lots of page flags available. This allows separate
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 310d329..ba037e3 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
> return ret;
> }
>
> -int test_set_page_writeback(struct page *page)
> +int __test_set_page_writeback(struct page *page, bool keep_write)
> {
> struct address_space *mapping = page_mapping(page);
> int ret;
> @@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
> radix_tree_tag_clear(&mapping->page_tree,
> page_index(page),
> PAGECACHE_TAG_DIRTY);
> - radix_tree_tag_clear(&mapping->page_tree,
> - page_index(page),
> - PAGECACHE_TAG_TOWRITE);
> + if (!keep_write)
> + radix_tree_tag_clear(&mapping->page_tree,
> + page_index(page),
> + PAGECACHE_TAG_TOWRITE);
> spin_unlock_irqrestore(&mapping->tree_lock, flags);
> } else {
> ret = TestSetPageWriteback(page);
> @@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
> return ret;
>
> }
> -EXPORT_SYMBOL(test_set_page_writeback);
> +EXPORT_SYMBOL(__test_set_page_writeback);
>
> /*
> * Return true if any of the pages in the mapping are marked with the
> --
> 1.7.11-rc0
>
--
Jan Kara <jack@...e.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists