[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5humv51rdjr8vl3c7qlroekj2k004gl665@4ax.com>
Date: Tue, 25 May 2010 17:21:43 +1000
From: Grant Coady <gcoady.lk@...il.com>
To: Greg KH <gregkh@...e.de>
Cc: linux-kernel@...r.kernel.org, stable@...nel.org,
stable-review@...nel.org, torvalds@...ux-foundation.org,
akpm@...ux-foundation.org, alan@...rguk.ukuu.org.uk,
Ext4 Developers List <linux-ext4@...r.kernel.org>,
"Theodore Tso" <tytso@....edu>,
"Jayson R. King" <dev@...sonking.com>,
"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
Subject: Re: [04/24] ext4: Make sure all the block allocation paths reserve blocks
On Mon, 24 May 2010 15:28:00 -0700, you wrote:
>2.6.27-stable review patch. If anyone has any objections, please let us know.
>
>------------------
>
>
>From: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
>
>commit a30d542a0035b886ffaafd0057ced0a2b28c3a4f upstream.
>
>With delayed allocation we need to make sure block are reserved before
>we attempt to allocate them. Otherwise we get block allocation failure
>(ENOSPC) during writepages which cannot be handled. This would mean
>silent data loss (We do a printk stating data will be lost). This patch
>updates the DIO and fallocate code path to do block reservation before
>block allocation. This is needed to make sure parallel DIO and fallocate
>request doesn't take block out of delayed reserve space.
>
>When free blocks count go below a threshold we switch to a slow patch
s/patch/path/ ??
Or, are these patch comments locked in stone for -stable?
Grant.
>which looks at other CPU's accumulated percpu counter values.
>
>Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
>Signed-off-by: "Theodore Ts'o" <tytso@....edu>
>Signed-off-by: Jayson R. King <dev@...sonking.com>
>Signed-off-by: Theodore Ts'o <tytso@....edu>
>Signed-off-by: Greg Kroah-Hartman <gregkh@...e.de>
>
>---
> fs/ext4/balloc.c | 58 +++++++++++++++++++++++++++++++++++++++---------------
> fs/ext4/ext4.h | 13 ++++++++++++
> fs/ext4/inode.c | 5 ----
> fs/ext4/mballoc.c | 23 ++++++++++++---------
> 4 files changed, 69 insertions(+), 30 deletions(-)
>
>--- a/fs/ext4/balloc.c
>+++ b/fs/ext4/balloc.c
>@@ -1754,6 +1754,32 @@ out:
> return ret;
> }
>
>+int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
>+ ext4_fsblk_t nblocks)
>+{
>+ s64 free_blocks;
>+ ext4_fsblk_t root_blocks = 0;
>+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
>+
>+ free_blocks = percpu_counter_read(fbc);
>+
>+ if (!capable(CAP_SYS_RESOURCE) &&
>+ sbi->s_resuid != current->fsuid &&
>+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
>+ root_blocks = ext4_r_blocks_count(sbi->s_es);
>+
>+ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
>+ free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
>+
>+ if (free_blocks < (root_blocks + nblocks))
>+ /* we don't have free space */
>+ return -ENOSPC;
>+
>+ /* reduce fs free blocks counter */
>+ percpu_counter_sub(fbc, nblocks);
>+ return 0;
>+}
>+
> /**
> * ext4_has_free_blocks()
> * @sbi: in-core super block structure.
>@@ -1775,18 +1801,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct
> sbi->s_resuid != current->fsuid &&
> (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
> root_blocks = ext4_r_blocks_count(sbi->s_es);
>-#ifdef CONFIG_SMP
>- if (free_blocks - root_blocks < FBC_BATCH)
>- free_blocks =
>- percpu_counter_sum(&sbi->s_freeblocks_counter);
>-#endif
>+
>+ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
>+ free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
>+
> if (free_blocks <= root_blocks)
> /* we don't have free space */
> return 0;
> if (free_blocks - root_blocks < nblocks)
> return free_blocks - root_blocks;
> return nblocks;
>- }
>+}
>
>
> /**
>@@ -1865,14 +1890,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
> /*
> * With delalloc we already reserved the blocks
> */
>- *count = ext4_has_free_blocks(sbi, *count);
>- }
>- if (*count == 0) {
>- *errp = -ENOSPC;
>- return 0; /*return with ENOSPC error */
>+ if (ext4_claim_free_blocks(sbi, *count)) {
>+ *errp = -ENOSPC;
>+ return 0; /*return with ENOSPC error */
>+ }
> }
>- num = *count;
>-
> /*
> * Check quota for allocation of this block.
> */
>@@ -2067,9 +2089,13 @@ allocated:
> le16_add_cpu(&gdp->bg_free_blocks_count, -num);
> gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
> spin_unlock(sb_bgl_lock(sbi, group_no));
>- if (!EXT4_I(inode)->i_delalloc_reserved_flag)
>- percpu_counter_sub(&sbi->s_freeblocks_counter, num);
>-
>+ if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
>+ /*
>+ * we allocated less blocks than we
>+ * claimed. Add the difference back.
>+ */
>+ percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
>+ }
> if (sbi->s_log_groups_per_flex) {
> ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
> spin_lock(sb_bgl_lock(sbi, flex_group));
>--- a/fs/ext4/ext4.h
>+++ b/fs/ext4/ext4.h
>@@ -1015,6 +1015,8 @@ extern ext4_fsblk_t ext4_new_blocks(hand
> unsigned long *count, int *errp);
> extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, unsigned long *count, int *errp);
>+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
>+ ext4_fsblk_t nblocks);
> extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
> ext4_fsblk_t nblocks);
> extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
>@@ -1245,6 +1247,17 @@ do { \
> __ext4_std_error((sb), __func__, (errno)); \
> } while (0)
>
>+#ifdef CONFIG_SMP
>+/* Each CPU can accumulate FBC_BATCH blocks in their local
>+ * counters. So we need to make sure we have free blocks more
>+ * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
>+ */
>+#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
>+#else
>+#define EXT4_FREEBLOCKS_WATERMARK 0
>+#endif
>+
>+
> /*
> * Inodes and files operations
> */
>--- a/fs/ext4/inode.c
>+++ b/fs/ext4/inode.c
>@@ -1564,13 +1564,10 @@ static int ext4_da_reserve_space(struct
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
>
>- if (ext4_has_free_blocks(sbi, total) < total) {
>+ if (ext4_claim_free_blocks(sbi, total)) {
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> return -ENOSPC;
> }
>- /* reduce fs free blocks counter */
>- percpu_counter_sub(&sbi->s_freeblocks_counter, total);
>-
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
>
>--- a/fs/ext4/mballoc.c
>+++ b/fs/ext4/mballoc.c
>@@ -3194,9 +3194,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
> * at write_begin() time for delayed allocation
> * do not double accounting
> */
>- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
>- percpu_counter_sub(&sbi->s_freeblocks_counter,
>- ac->ac_b_ex.fe_len);
>+ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
>+ ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
>+ /*
>+ * we allocated less blocks than we calimed
>+ * Add the difference back
>+ */
>+ percpu_counter_add(&sbi->s_freeblocks_counter,
>+ ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
>+ }
>
> if (sbi->s_log_groups_per_flex) {
> ext4_group_t flex_group = ext4_flex_group(sbi,
>@@ -4649,14 +4655,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> /*
> * With delalloc we already reserved the blocks
> */
>- ar->len = ext4_has_free_blocks(sbi, ar->len);
>- }
>-
>- if (ar->len == 0) {
>- *errp = -ENOSPC;
>- return 0;
>+ if (ext4_claim_free_blocks(sbi, ar->len)) {
>+ *errp = -ENOSPC;
>+ return 0;
>+ }
> }
>-
> while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
> ar->flags |= EXT4_MB_HINT_NOPREALLOC;
> ar->len--;
--
http://bugs.id.au/
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists