[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1224830067-22963-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 24 Oct 2008 12:04:26 +0530
From: "Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To: cmm@...ibm.com, tytso@....edu, frederic.bohe@...l.net
Cc: linux-ext4@...r.kernel.org,
"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
Subject: [RFC PATCH 2/3] ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize
The new groups added during resize are flagged as
need_init group. Make sure we properly initialize these
groups. When we have block size < page size and we are adding
new groups the page may still be marked uptodate even though
we haven't initialized the group.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
---
fs/ext4/ext4.h | 2 +-
fs/ext4/mballoc.c | 192 ++++++++++++++++++++++++++++++++++++++---------------
fs/ext4/mballoc.h | 3 +
fs/ext4/resize.c | 41 +-----------
4 files changed, 143 insertions(+), 95 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ca915d5..157d11e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1073,7 +1073,7 @@ extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
unsigned long, unsigned long, int, unsigned long *);
-extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
+extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
ext4_grpblk_t add);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c08df26..a614e36 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -886,18 +886,20 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
struct ext4_buddy *e4b)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct inode *inode = sbi->s_buddy_cache;
int blocks_per_page;
int block;
int pnum;
int poff;
struct page *page;
int ret;
+ struct ext4_group_info *grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct inode *inode = sbi->s_buddy_cache;
mb_debug("load group %lu\n", group);
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ grp = ext4_get_group_info(sb, group);
e4b->bd_blkbits = sb->s_blocksize_bits;
e4b->bd_info = ext4_get_group_info(sb, group);
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
e4b->bd_group = group;
e4b->bd_buddy_page = NULL;
e4b->bd_bitmap_page = NULL;
+ e4b->alloc_semp = &grp->alloc_sem;
+
+ /* Take the read lock on the group alloc
+ * sem. This would make sure a parallel
+ * ext4_mb_init_group happening on other
+ * groups mapped by the page is blocked
+ * till we are done with allocation
+ */
+ down_read(e4b->alloc_semp);
/*
* the buddy cache inode stores the block bitmap
@@ -919,8 +930,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* what we'd like to avoid in fast path ... */
page = find_get_page(inode->i_mapping, pnum);
if (page == NULL || !PageUptodate(page)) {
- if (page)
+ if (page) {
+ /* If the page is not uptodate
+ * that would imply nobody is using
+ * it. So we should be able to drop
+ * page_cache without being worried
+ * about other group allocation
+ */
page_cache_release(page);
+ }
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
@@ -994,6 +1012,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
page_cache_release(e4b->bd_bitmap_page);
if (e4b->bd_buddy_page)
page_cache_release(e4b->bd_buddy_page);
+ /* Done with the buddy cache */
+ up_read(e4b->alloc_semp);
}
@@ -1692,6 +1712,118 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
return 0;
}
+static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+{
+
+ int i, ret;
+ void *bitmap;
+ int blocks_per_page;
+ int groups_per_page;
+ int block, pnum, poff;
+ ext4_group_t first_group;
+ struct ext4_group_info *grp, *this_grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct inode *inode = sbi->s_buddy_cache;
+ struct page *page = NULL, *bitmap_page = NULL;
+
+ mb_debug("init group %lu\n", group);
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ poff = block % blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+ this_grp = ext4_get_group_info(sb, group);
+
+ groups_per_page = blocks_per_page >> 1;
+ if (groups_per_page == 0)
+ groups_per_page = 1;
+
+ /* read all groups the page covers into the cache */
+ for (i = 0; i < groups_per_page; i++) {
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ down_write(&grp->alloc_sem);
+ }
+ if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
+ /*
+ * somebody initialized the group
+ * return without doing anything
+ */
+ ret = 0;
+ goto err;
+ }
+
+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ if (page) {
+ BUG_ON(page->mapping != inode->i_mapping);
+ ret = ext4_mb_init_cache(page, NULL);
+ if (ret) {
+ unlock_page(page);
+ goto err;
+ }
+ unlock_page(page);
+ }
+ if (page == NULL || !PageUptodate(page)) {
+ ret = -EIO;
+ goto err;
+ }
+ mark_page_accessed(page);
+ bitmap_page = page;
+ bitmap = page_address(page) + (poff * sb->s_blocksize);
+
+ /* init buddy cache */
+ block++;
+ pnum = block / blocks_per_page;
+ poff = block % blocks_per_page;
+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ if (page == bitmap_page) {
+ /*
+ * If both the bitmap and buddy are in
+ * the same page we don't need to force
+ * init the buddy
+ */
+ unlock_page(page);
+ } else if (page) {
+ BUG_ON(page->mapping != inode->i_mapping);
+ ret = ext4_mb_init_cache(page, bitmap);
+ if (ret) {
+ unlock_page(page);
+ goto err;
+ }
+ unlock_page(page);
+ }
+ if (page == NULL || !PageUptodate(page)) {
+ ret = -EIO;
+ goto err;
+ }
+ mark_page_accessed(page);
+err:
+ /* release locks on all the groups */
+ for (i = 0; i < groups_per_page; i++) {
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ up_write(&grp->alloc_sem);
+ }
+ if (bitmap_page)
+ page_cache_release(bitmap_page);
+ if (page)
+ page_cache_release(page);
+ return ret;
+}
+
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
@@ -1775,7 +1907,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
group = 0;
/* quick check to skip empty groups */
- grp = ext4_get_group_info(ac->ac_sb, group);
+ grp = ext4_get_group_info(sb, group);
if (grp->bb_free == 0)
continue;
@@ -1788,10 +1920,9 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* we need full data about the group
* to make a good selection
*/
- err = ext4_mb_load_buddy(sb, group, &e4b);
+ err = ext4_mb_init_group(sb, group);
if (err)
goto out;
- ext4_mb_release_desc(&e4b);
}
/*
@@ -2300,6 +2431,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
}
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+ init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root.rb_node = NULL;;
#ifdef DOUBLE_CHECK
@@ -2327,54 +2459,6 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
} /* ext4_mb_add_groupinfo */
/*
- * Add a group to the existing groups.
- * This function is used for online resize
- */
-int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
- struct ext4_group_desc *desc)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct inode *inode = sbi->s_buddy_cache;
- int blocks_per_page;
- int block;
- int pnum;
- struct page *page;
- int err;
-
- /* Add group based on group descriptor*/
- err = ext4_mb_add_groupinfo(sb, group, desc);
- if (err)
- return err;
-
- /*
- * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
- * datas) are set not up to date so that they will be re-initilaized
- * during the next call to ext4_mb_load_buddy
- */
-
- /* Set buddy page as not up to date */
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- block = group * 2;
- pnum = block / blocks_per_page;
- page = find_get_page(inode->i_mapping, pnum);
- if (page != NULL) {
- ClearPageUptodate(page);
- page_cache_release(page);
- }
-
- /* Set bitmap page as not up to date */
- block++;
- pnum = block / blocks_per_page;
- page = find_get_page(inode->i_mapping, pnum);
- if (page != NULL) {
- ClearPageUptodate(page);
- page_cache_release(page);
- }
-
- return 0;
-}
-
-/*
* Update an existing group.
* This function is used for online resize
*/
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 9e815c4..317730a 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -18,6 +18,7 @@
#include <linux/pagemap.h>
#include <linux/seq_file.h>
#include <linux/version.h>
+#include <linux/mutex.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "group.h"
@@ -128,6 +129,7 @@ struct ext4_group_info {
#ifdef DOUBLE_CHECK
void *bb_bitmap;
#endif
+ struct rw_semaphore alloc_sem;
unsigned short bb_counters[];
};
@@ -248,6 +250,7 @@ struct ext4_buddy {
struct super_block *bd_sb;
__u16 bd_blkbits;
ext4_group_t bd_group;
+ struct rw_semaphore *alloc_semp;
};
#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index a8403a8..65dea15 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -870,7 +870,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* We can allocate memory for mb_alloc based on the new group
* descriptor
*/
- err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
+ err = ext4_mb_add_groupinfo(sb, input->group, gdp);
if (err)
goto exit_journal;
@@ -1082,45 +1082,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
if ((err = ext4_journal_stop(handle)))
goto exit_put;
- /*
- * Mark mballoc pages as not up to date so that they will be updated
- * next time they are loaded by ext4_mb_load_buddy.
- *
- * XXX Bad, Bad, BAD!!! We should not be overloading the
- * Uptodate flag, particularly on thte bitmap bh, as way of
- * hinting to ext4_mb_load_buddy() that it needs to be
- * overloaded. A user could take a LVM snapshot, then do an
- * on-line fsck, and clear the uptodate flag, and this would
- * not be a bug in userspace, but a bug in the kernel. FIXME!!!
- */
- {
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct inode *inode = sbi->s_buddy_cache;
- int blocks_per_page;
- int block;
- int pnum;
- struct page *page;
-
- /* Set buddy page as not up to date */
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- block = group * 2;
- pnum = block / blocks_per_page;
- page = find_get_page(inode->i_mapping, pnum);
- if (page != NULL) {
- ClearPageUptodate(page);
- page_cache_release(page);
- }
-
- /* Set bitmap page as not up to date */
- block++;
- pnum = block / blocks_per_page;
- page = find_get_page(inode->i_mapping, pnum);
- if (page != NULL) {
- ClearPageUptodate(page);
- page_cache_release(page);
- }
- }
-
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
ext4_blocks_count(es));
--
1.6.0.3.514.g2f91b
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists