[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87zl0pbc4l.fsf@openvz.org>
Date: Tue, 27 Apr 2010 11:14:34 +0400
From: Dmitry Monakhov <dmonakhov@...nvz.org>
To: linux-ext4@...r.kernel.org
Subject: Re: [PATCH] ext4: Do not dec quota for reserved blocks on error paths
Dmitry Monakhov <dmonakhov@...nvz.org> writes:
> If we have failed some where inside ext4_get_blocks() internals we may
> have allocated some new blocks, which was not yet claimed to quota.
> We have to free such blocks, but without touching quota. Quota will
> be updated later on exit from ext4_get_blocks().
> There are two possible ways to understand what we have to skip quota update:
> 1) Caller pass corresponding flag to ext4_free_blocks()
> 2) check that free_blocks() was indirectly called by get_blocks()
> (i.e EXT4_I(inode)->i_delalloc_reserved_flag is set)
> Second is simpler, but may result in unpredictable consequences later.
> So i've chosen the first one, because caller must know which blocks it
> is freeing.
Hm... my test failed one more time, seems that single NOQUOTA flag is
not enough. Because if metadata block was allocated it was charged to
ei->i_allocated_meta_blocks so this counter must being decremented
accordingly.
>
> The bug happens on heavily loaded node, or with 227'th xfstestcase and
> result in incorrect i_blocks (less than expected). So truncation for
> that file result in i_blocks overflow.
> Seems this was the last bug which was easily triggered by 227'th testcase.
>
> Test hosts survived for 24hrs without complain. Tested configuration:
> * ext3 over ext4driver with -odelalloc mount option
> * ext4 with default mount opt
>
> From ff4594462fe9101042df9c47a7c35cddd809db4f Mon Sep 17 00:00:00 2001
> From: Dmitry Monakhov <dmonakhov@...nvz.org>
> Date: Tue, 27 Apr 2010 06:49:28 +0400
> Subject: [PATCH] ext4: Do not drop quota for reserved blocks on error paths
>
> If we have failed some where inside ext4_get_blocks() internals we may
> have allocated some new blocks, which was not yet claimed to quota.
> We have to free such blocks, but without touching quota. Quota will
> be updated later on exit from ext4_get_blocks().
> The bug happens on heavily loaded node.
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@...nvz.org>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/extents.c | 18 +++++++++++++-----
> fs/ext4/inode.c | 26 +++++++++++++++++---------
> fs/ext4/mballoc.c | 2 +-
> 4 files changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index c69efb2..c009805 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -401,6 +401,7 @@ struct ext4_new_group_data {
> #define EXT4_FREE_BLOCKS_METADATA 0x0001
> #define EXT4_FREE_BLOCKS_FORGET 0x0002
> #define EXT4_FREE_BLOCKS_VALIDATED 0x0004
> +#define EXT4_FREE_BLOCKS_NOQUOTA 0x0008
>
> /*
> * ioctl commands
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 6856272..815fd97 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1057,11 +1057,15 @@ cleanup:
>
> if (err) {
> /* free all allocated blocks in error case */
> + int fb_flags = EXT4_FREE_BLOCKS_METADATA;
> + if (EXT4_I(inode)->i_delalloc_reserved_flag)
> + fb_flags |= EXT4_FREE_BLOCKS_NOQUOTA;
> +
> for (i = 0; i < depth; i++) {
> if (!ablocks[i])
> continue;
> ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
> - EXT4_FREE_BLOCKS_METADATA);
> + fb_flags);
> }
> }
> kfree(ablocks);
> @@ -3553,12 +3557,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
> }
> err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
> if (err) {
> - /* free data blocks we just allocated */
> - /* not a good idea to call discard here directly,
> - * but otherwise we'd need to call it every free() */
> + int fb_flags = 0;
> + /* free data blocks we just allocated
> + * Not a good idea to call discard here directly,
> + * but otherwise we'd need to call it every free().
> + * On delalloc blocks are not yet accounted to quota */
> + if (EXT4_I(inode)->i_delalloc_reserved_flag)
> + fb_flags = EXT4_FREE_BLOCKS_NOQUOTA;
> ext4_discard_preallocations(inode);
> ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
> - ext4_ext_get_actual_len(&newex), 0);
> + ext4_ext_get_actual_len(&newex), fb_flags);
> goto out2;
> }
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index e4e0a7d..58a3787 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -591,7 +591,9 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
> int index = 0;
> ext4_fsblk_t current_block = 0;
> int ret = 0;
> -
> + int fb_flags = 0;
> + if (EXT4_I(inode)->i_delalloc_reserved_flag)
> + fb_flags |= EXT4_FREE_BLOCKS_NOQUOTA;
> /*
> * Here we try to allocate the requested multiple blocks at once,
> * on a best-effort basis.
> @@ -686,7 +688,7 @@ allocated:
> return ret;
> failed_out:
> for (i = 0; i < index; i++)
> - ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, fb_flags);
> return ret;
> }
>
> @@ -727,6 +729,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
> int num;
> ext4_fsblk_t new_blocks[4];
> ext4_fsblk_t current_block;
> + int fb_flags = 0;
> + if (EXT4_I(inode)->i_delalloc_reserved_flag)
> + fb_flags |= EXT4_FREE_BLOCKS_NOQUOTA;
>
> num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
> *blks, new_blocks, &err);
> @@ -782,20 +787,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
> return err;
> failed:
> /* Allocation failed, free what we already allocated */
> - ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
> + ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, fb_flags);
> for (i = 1; i <= n ; i++) {
> - /*
> + /*
> * branch[i].bh is newly allocated, so there is no
> * need to revoke the block, which is why we don't
> * need to set EXT4_FREE_BLOCKS_METADATA.
> */
> ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
> - EXT4_FREE_BLOCKS_FORGET);
> + fb_flags | EXT4_FREE_BLOCKS_FORGET);
> }
> for (i = n+1; i < indirect_blks; i++)
> - ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, fb_flags);
>
> - ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], num, fb_flags);
>
> return err;
> }
> @@ -821,6 +826,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
> int i;
> int err = 0;
> ext4_fsblk_t current_block;
> + int fb_flags = 0;
> + if (EXT4_I(inode)->i_delalloc_reserved_flag)
> + fb_flags |= EXT4_FREE_BLOCKS_NOQUOTA;
>
> /*
> * If we're splicing into a [td]indirect block (as opposed to the
> @@ -880,10 +888,10 @@ err_out:
> * need to set EXT4_FREE_BLOCKS_METADATA.
> */
> ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
> - EXT4_FREE_BLOCKS_FORGET);
> + fb_flags | EXT4_FREE_BLOCKS_FORGET);
> }
> ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
> - blks, 0);
> + blks, fb_flags);
>
> return err;
> }
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 3c27377..fa2fcf2 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -4635,7 +4635,7 @@ do_more:
> }
> sb->s_dirt = 1;
> error_return:
> - if (freed)
> + if (!(flags & EXT4_FREE_BLOCKS_NOQUOTA) && freed)
> dquot_free_block(inode, freed);
> brelse(bitmap_bh);
> ext4_std_error(sb, err);
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists