[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1222435027.8189.24.camel@frecb007923.frec.bull.fr>
Date: Fri, 26 Sep 2008 15:17:07 +0200
From: Frédéric Bohé <frederic.bohe@...l.net>
To: "Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>,
Theodore Tso <tytso@....edu>
Cc: "linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH v2] ext4: fix initialization of UNINIT bitmap blocks
Le mercredi 24 septembre 2008 à 14:38 +0200, Frédéric Bohé a écrit :
> Le lundi 22 septembre 2008 à 11:32 +0200, Frédéric Bohé a écrit :
> > Le lundi 22 septembre 2008 à 14:17 +0530, Aneesh Kumar K.V a écrit :
> > > What you can do is make ext4_group_info generic for both mballoc and
> > > oldalloc. We can then add bg_flag to the in memory ext4_group_info
> > > that would indicate whether the group is initialized or not. Here
> > > initialized for an UNINIT_GROUP indicate we have done
> > > ext4_init_block_bitmap on the buffer_head. Then
> > > instead of depending on the buffer_head uptodate flag we can check
> > > for the ext4_group_info bg_flags and decided whether the block/inode
> > > bitmap need to be initialized.
> > >
> >
> > That makes sense ! I agree with you, we need an additional in-memory
> > flag to know whether buffers are initialized or not. Anyway, making
> > ext4_group_info generic will lead to unneeded memory consumption for
> > oldalloc. Maybe a simple independent bits array could do the trick. Is
> > there any advantage to re-use ext4_group_info ?
> >
>
> This is an implementation of what I was talking about. Please let me know your comments.
>
> Index: linux-2.6.27-rc6+patch_queue/fs/ext4/balloc.c
> ===================================================================
> --- linux-2.6.27-rc6+patch_queue.orig/fs/ext4/balloc.c 2008-09-23 15:04:39.000000000 +0200
> +++ linux-2.6.27-rc6+patch_queue/fs/ext4/balloc.c 2008-09-23 15:39:38.000000000 +0200
> @@ -175,6 +175,8 @@ unsigned ext4_init_block_bitmap(struct s
> */
> mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
> }
> +
> + ext4_set_bit(block_group, sbi->s_block_bitmap_buffer_state);
> return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
> }
>
> @@ -318,9 +320,13 @@ ext4_read_block_bitmap(struct super_bloc
> block_group, bitmap_blk);
> return NULL;
> }
> - if (bh_uptodate_or_lock(bh))
> +
> + if (buffer_uptodate(bh) && ext4_test_bit(block_group,
> + EXT4_SB(sb)->s_block_bitmap_buffer_state))
> return bh;
>
> + lock_buffer(bh);
> +
> spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
> if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
> ext4_init_block_bitmap(sb, bh, block_group, desc);
> @@ -328,7 +334,9 @@ ext4_read_block_bitmap(struct super_bloc
> unlock_buffer(bh);
> spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
> return bh;
> - }
> + } else
> + ext4_set_bit(block_group,
> + EXT4_SB(sb)->s_block_bitmap_buffer_state);
> spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
> if (bh_submit_read(bh) < 0) {
> put_bh(bh);
> Index: linux-2.6.27-rc6+patch_queue/fs/ext4/ext4_sb.h
> ===================================================================
> --- linux-2.6.27-rc6+patch_queue.orig/fs/ext4/ext4_sb.h 2008-09-23 15:07:28.000000000 +0200
> +++ linux-2.6.27-rc6+patch_queue/fs/ext4/ext4_sb.h 2008-09-23 15:40:57.000000000 +0200
> @@ -147,6 +147,14 @@ struct ext4_sb_info {
>
> unsigned int s_log_groups_per_flex;
> struct flex_groups *s_flex_groups;
> +
> + /*
> + * Flag for the state of the bitmaps buffers
> + * 0 = unknown or uninitialized
> + * 1 = initialized
> + */
> + char *s_block_bitmap_buffer_state;
> + char *s_inode_bitmap_buffer_state;
> };
>
> #endif /* _EXT4_SB */
> Index: linux-2.6.27-rc6+patch_queue/fs/ext4/ialloc.c
> ===================================================================
> --- linux-2.6.27-rc6+patch_queue.orig/fs/ext4/ialloc.c 2008-09-23 15:09:15.000000000 +0200
> +++ linux-2.6.27-rc6+patch_queue/fs/ext4/ialloc.c 2008-09-23 15:41:24.000000000 +0200
> @@ -86,6 +86,7 @@ unsigned ext4_init_inode_bitmap(struct s
> memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
> mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
> bh->b_data);
> + ext4_set_bit(block_group, sbi->s_inode_bitmap_buffer_state);
>
> return EXT4_INODES_PER_GROUP(sb);
> }
> @@ -115,9 +116,12 @@ ext4_read_inode_bitmap(struct super_bloc
> block_group, bitmap_blk);
> return NULL;
> }
> - if (bh_uptodate_or_lock(bh))
> +
> + if (buffer_uptodate(bh) && ext4_test_bit(block_group,
> + EXT4_SB(sb)->s_inode_bitmap_buffer_state))
> return bh;
>
> + lock_buffer(bh);
> spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
> if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
> ext4_init_inode_bitmap(sb, bh, block_group, desc);
> @@ -125,7 +129,9 @@ ext4_read_inode_bitmap(struct super_bloc
> unlock_buffer(bh);
> spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
> return bh;
> - }
> + } else
> + ext4_set_bit(block_group,
> + EXT4_SB(sb)->s_inode_bitmap_buffer_state);
> spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
> if (bh_submit_read(bh) < 0) {
> put_bh(bh);
> Index: linux-2.6.27-rc6+patch_queue/fs/ext4/mballoc.c
> ===================================================================
> --- linux-2.6.27-rc6+patch_queue.orig/fs/ext4/mballoc.c 2008-09-23 15:11:48.000000000 +0200
> +++ linux-2.6.27-rc6+patch_queue/fs/ext4/mballoc.c 2008-09-24 14:27:55.000000000 +0200
> @@ -785,9 +785,12 @@ static int ext4_mb_init_cache(struct pag
> if (bh[i] == NULL)
> goto out;
>
> - if (bh_uptodate_or_lock(bh[i]))
> + if (buffer_uptodate(bh[i]) && ext4_test_bit(first_group + i,
> + EXT4_SB(sb)->s_block_bitmap_buffer_state))
> continue;
>
> + lock_buffer(bh[i]);
> +
> spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
> if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
> ext4_init_block_bitmap(sb, bh[i],
> @@ -796,7 +799,9 @@ static int ext4_mb_init_cache(struct pag
> unlock_buffer(bh[i]);
> spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
> continue;
> - }
> + } else
> + ext4_set_bit(first_group + i,
> + EXT4_SB(sb)->s_block_bitmap_buffer_state);
> spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
> get_bh(bh[i]);
> bh[i]->b_end_io = end_buffer_read_sync;
> Index: linux-2.6.27-rc6+patch_queue/fs/ext4/super.c
> ===================================================================
> --- linux-2.6.27-rc6+patch_queue.orig/fs/ext4/super.c 2008-09-23 15:16:15.000000000 +0200
> +++ linux-2.6.27-rc6+patch_queue/fs/ext4/super.c 2008-09-24 14:28:42.000000000 +0200
> @@ -2219,6 +2219,20 @@ static int ext4_fill_super(struct super_
> printk(KERN_ERR "EXT4-fs: not enough memory\n");
> goto failed_mount;
> }
> + sbi->s_block_bitmap_buffer_state = kzalloc((sbi->s_groups_count +
> + le16_to_cpu(es->s_reserved_gdt_blocks) +
> + 7) / 8, GFP_KERNEL);
> + if (sbi->s_block_bitmap_buffer_state == NULL) {
> + printk(KERN_ERR "EXT4-fs: not enough memory\n");
> + goto failed_mount;
> + }
> + sbi->s_inode_bitmap_buffer_state = kzalloc((sbi->s_groups_count +
> + le16_to_cpu(es->s_reserved_gdt_blocks) +
> + 7) / 8, GFP_KERNEL);
> + if (sbi->s_inode_bitmap_buffer_state == NULL) {
> + printk(KERN_ERR "EXT4-fs: not enough memory\n");
> + goto failed_mount;
> + }
>
> bgl_lock_init(&sbi->s_blockgroup_lock);
>
After some testing of this implementation, I think that using a bit to
know whether we have done ext4_init_block_bitmap or not for the bitmaps
of a group is useless. In fact this method works as long as buffer head
are not freed. But consider we have already initialized the "in-memory
init" bit then the buffer is re-read from the disk after being freed :
we come back to the initial problem with on-disk garbage in the buffer
head !
At the moment, the only safe way I see of knowing whether we have to
initialize the buffer-head or not is to rely on the UNINIT flag in the
group descriptor (the way my initial patch does).
As Aneesh said, this will possibly lead to multiple call
ext4_init_block_bitmap instead of one. So there may be an impact on the
performance.
Frederic
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists