[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20130308124940.GB18949@gmail.com>
Date: Fri, 8 Mar 2013 20:49:40 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: Lukáš Czerner <lczerner@...hat.com>
Cc: linux-ext4@...r.kernel.org
Subject: Re: [BUG][Bigalloc] applictions will be blocked for more than 120s
when we run xfstests #083
On Thu, Mar 07, 2013 at 03:07:25PM +0100, Lukáš Czerner wrote:
[snip]
>
> Yes, I can confirm that. The problem is that when we have delayed
> write into unwritten extent we do not reserve any space, which is ok
> because the data has already been allocated, however we might need
> metadata blocks to cover unwritten extent conversion which we do not
> have reserved.
>
> Then in writeback time when the extent splic actually happen we
> might not have enough space to allocate metadata blocks hence
> ext4_map_blocks() in mpage_da_map_and_submit() will return -ENOSPC
> to the ext4_da_writepages() caller.
>
> However we're in writeback and we do not expect allocation to fail
> because of ENOSPC at all because we should have reserved everything
> we need to complete successfully so in the loop we'll force the
> journal commit hoping that some blocks will be released and retry
> the allocation again...and we'll be stuck in this loop forever.
>
> Now here is patch which fixes the problem for me, however it still
> needs some testing. Also we should probably do something about the
> infinite loop in the ext4_da_writepages() - at least warn the user
> if we try too many times so we at least know what's happening
> because it was not easy to find this out.
>
> Hopefully I'll send the proper patch soon, but feel free to test the
> fix yourself.
I have seen that you have sent the patch series to the mailing list, and
I will take a close look at them.
For this patch, I can confirm that xfstests #083 never hang, and I only
see the warning from ext4_da_update_reserve_space() in #269. I guess
that has been fixed by your patch series. Thanks for fixing it.
Tested-by: Zheng Liu <wenqing.lz@...bao.com>
Regards,
- Zheng
>
> Thanks!
> -Lukas
>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/inode.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 70 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6e16c18..c20efe2 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -581,6 +581,7 @@ enum {
> #define EXT4_GET_BLOCKS_NO_LOCK 0x0100
> /* Do not put hole in extent cache */
> #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
> +#define EXT4_GET_BLOCKS_METADATA_RESERVED 0x0400
>
> /*
> * Flags used by ext4_free_blocks
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 9ea0cde..46cc739 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -606,7 +606,8 @@ found:
> * let the underlying get_block() function know to
> * avoid double accounting
> */
> - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
> + if ((flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ||
> + (flags & EXT4_GET_BLOCKS_METADATA_RESERVED))
> ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
> /*
> * We need to check for EXT4 here because migrate
> @@ -636,7 +637,8 @@ found:
> (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
> ext4_da_update_reserve_space(inode, retval, 1);
> }
> - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
> + if ((flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ||
> + (flags & EXT4_GET_BLOCKS_METADATA_RESERVED))
> ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
>
> if (retval > 0) {
> @@ -1215,6 +1217,56 @@ static int ext4_journalled_write_end(struct file *file,
> return ret ? ret : copied;
> }
>
> +
> +/*
> + * Reserve a metadata for a single block located at lblock
> + */
> +static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
> +{
> + int retries = 0;
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + struct ext4_inode_info *ei = EXT4_I(inode);
> + unsigned int md_needed;
> + ext4_lblk_t save_last_lblock;
> + int save_len;
> +
> + /*
> + * recalculate the amount of metadata blocks to reserve
> + * in order to allocate nrblocks
> + * worse case is one extent per block
> + */
> +repeat:
> + spin_lock(&ei->i_block_reservation_lock);
> + /*
> + * ext4_calc_metadata_amount() has side effects, which we have
> + * to be prepared undo if we fail to claim space.
> + */
> + save_len = ei->i_da_metadata_calc_len;
> + save_last_lblock = ei->i_da_metadata_calc_last_lblock;
> + md_needed = EXT4_NUM_B2C(sbi,
> + ext4_calc_metadata_amount(inode, lblock));
> + trace_ext4_da_reserve_space(inode, md_needed);
> +
> + /*
> + * We do still charge estimated metadata to the sb though;
> + * we cannot afford to run out of free blocks.
> + */
> + if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
> + ei->i_da_metadata_calc_len = save_len;
> + ei->i_da_metadata_calc_last_lblock = save_last_lblock;
> + spin_unlock(&ei->i_block_reservation_lock);
> + if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> + yield();
> + goto repeat;
> + }
> + return -ENOSPC;
> + }
> + ei->i_reserved_meta_blocks += md_needed;
> + spin_unlock(&ei->i_block_reservation_lock);
> +
> + return 0; /* success */
> +}
> +
> /*
> * Reserve a single cluster located at lblock
> */
> @@ -1601,7 +1653,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
> */
> map.m_lblk = next;
> map.m_len = max_blocks;
> - get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
> + get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
> + EXT4_GET_BLOCKS_METADATA_RESERVED;
> if (ext4_should_dioread_nolock(mpd->inode))
> get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
> if (mpd->b_state & (1 << BH_Delay))
> @@ -1766,7 +1819,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
> struct buffer_head *bh)
> {
> struct extent_status es;
> - int retval;
> + int retval, ret;
> sector_t invalid_block = ~((sector_t) 0xffff);
>
> if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
> @@ -1804,9 +1857,19 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
> map->m_len = retval;
> if (ext4_es_is_written(&es))
> map->m_flags |= EXT4_MAP_MAPPED;
> - else if (ext4_es_is_unwritten(&es))
> + else if (ext4_es_is_unwritten(&es)) {
> + /*
> + * We have delalloc write into the unwritten extent
> + * which means that we have to reserve metadata
> + * potentially required for converting unwritten
> + * extent.
> + */
> + ret = ext4_da_reserve_metadata(inode, iblock);
> + if (ret)
> + /* not enough space to reserve */
> + retval = ret;
> map->m_flags |= EXT4_MAP_UNWRITTEN;
> - else
> + } else
> BUG_ON(1);
>
> return retval;
> @@ -1838,7 +1901,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
>
> add_delayed:
> if (retval == 0) {
> - int ret;
> /*
> * XXX: __block_prepare_write() unmaps passed block,
> * is it OK?
> --
> 1.7.7.6
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists