[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080110153358.GA9367@skywalker>
Date: Thu, 10 Jan 2008 21:03:58 +0530
From: "Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To: Mingming Cao <cmm@...ibm.com>
Cc: ext4 development <linux-ext4@...r.kernel.org>
Subject: patch queue update
Hi Mingming,
New patches for patch queue can be found at
http://www.radian.org/~kvaneesh/ext4/jan-10-2008-ver2/
The changes are
------------
a) mballoc patch got an explanation about regular allocator.
b) mballoc regular allocator we changed the usage of ffs to fls. I guess
it makes sense to use fls because we want to compare it against the
tunable s_mb_order2_reqs. Only request above this order are using
criteria 0 allocation.
c) stripe.patch to use the stripe size set in the super block for block
allocation.
The diff is attached for reference.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0d31817..0085fde 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -468,7 +468,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *);
static void ext4_mb_return_to_preallocation(struct inode *inode,
struct ext4_buddy *e4b, sector_t block,
int count);
-static void ext4_mb_show_ac(struct ext4_allocation_context *ac);
static void ext4_mb_put_pa(struct ext4_allocation_context *, struct super_block *,
struct ext4_prealloc_space *pa);
static int ext4_mb_init_per_dev_proc(struct super_block *sb);
@@ -1838,14 +1837,23 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
goto out;
- i = ffs(ac->ac_g_ex.fe_len);
+ /*
+ * ac->ac2_order is set only if the fe_len is a power of 2
+ * if ac2_order is set we also set criteria to 0 so whtat we
+ * try exact allocation using buddy.
+ */
+ i = fls(ac->ac_g_ex.fe_len);
ac->ac_2order = 0;
- /* FIXME!!
- * What happens if i is still greater than s_mb_order2_reqs
+ /*
+ * We search using buddy data only if the order of the request
+ * is greater than equal to the sbi_s_mb_order2_reqs
+ * You can tune it via /proc/fs/ext4/<partition>/order2_req
*/
if (i >= sbi->s_mb_order2_reqs) {
- i--;
- if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0)
+ /*
+ * This should tell if fe_len is exactly power of 2
+ */
+ if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
ac->ac_2order = i;
}
@@ -1865,17 +1873,17 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
spin_unlock(&sbi->s_md_lock);
}
+ /* searching for the right group start from the goal value specified */
group = ac->ac_g_ex.fe_group;
/* Let's just scan groups to find more-less suitable blocks */
cr = ac->ac_2order ? 0 : 1;
+ /*
+ * cr == 0 try to get exact allocation,
+ * cr == 3 try to get anything
+ */
repeat:
for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
- /* FIXME!!
- * We need to explain what criteria is and also
- * need to define the number 0 to 4 for criteria
- * What they actually means.
- */
ac->ac_criteria = cr;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
struct ext4_group_info *grp;
@@ -1889,23 +1897,28 @@ repeat:
if (grp->bb_free == 0)
continue;
+ /*
+ * if the group is already init we check whether it is
+ * a good group and if not we don't load the buddy
+ */
if (EXT4_MB_GRP_NEED_INIT(EXT4_GROUP_INFO(sb, group))) {
- /* we need full data about the group
- * to make a good selection */
+ /*
+ * we need full data about the group
+ * to make a good selection
+ */
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err)
goto out;
ext4_mb_release_desc(&e4b);
}
- /* check is group good for our criteries */
+ /*
+ * If the particular group doesn't satisfy our
+ * criteria we continue with the next group
+ */
if (!ext4_mb_good_group(ac, group, cr))
continue;
- /* FIXME!!
- * here also we are loading the buddy. so what difference
- * does EXT4_MB_GRP_NEED_INIT actually make
- */
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err)
goto out;
@@ -3726,10 +3739,9 @@ repeat:
busy = 0;
ext4_unlock_group(sb, group);
/*
- * We see this quiet rare. But if a particular workload is
- * effected by this we may need to add a waitqueue
+ * Yield the CPU here so that we don't get soft lockup
*/
- schedule_timeout(HZ);
+ schedule();
goto repeat;
}
@@ -3808,7 +3820,7 @@ repeat:
printk(KERN_ERR "uh-oh! used pa while discarding\n");
dump_stack();
current->state = TASK_UNINTERRUPTIBLE;
- schedule();
+ schedule_timeout(HZ);
goto repeat;
}
@@ -3832,8 +3844,12 @@ repeat:
* pa from inode's list may access already
* freed memory, bad-bad-bad */
+ /* XXX: if this happens too often, we can
+ * add a flag to force wait only in case
+ * of ->clear_inode(), but not in case of
+ * regular truncate */
current->state = TASK_UNINTERRUPTIBLE;
- schedule();
+ schedule_timeout(HZ);
goto repeat;
}
spin_unlock(&ei->i_prealloc_lock);
@@ -3878,7 +3894,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
{
BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
}
-
+#ifdef MB_DEBUG
static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
@@ -3928,6 +3944,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
}
printk(KERN_ERR "\n");
}
+#else
+#define ext4_mb_show_ac(x)
+#endif
/*
* We use locality group preallocation for small size file. The size of the
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c69f4e5..9d91c60 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1775,6 +1775,21 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
return (has_super + ext4_group_first_block_no(sb, bg));
}
+static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
+{
+ unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
+ unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width);
+
+ if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
+ return sbi->s_stripe;
+ } else if (stripe_width <= sbi->s_blocks_per_group) {
+ return stripe_width;
+ } else if (stride <= sbi->s_blocks_per_group) {
+ return stride;
+ }
+
+ return 0;
+}
static int ext4_fill_super (struct super_block *sb, void *data, int silent)
__releases(kernel_sem)
@@ -2131,6 +2146,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_rsv_window_head.rsv_alloc_hit = 0;
sbi->s_rsv_window_head.rsv_goal_size = 0;
ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
+ /*
+ * set the stripe size. If we have specified it via mount option, then
+ * use the mount option value. If the value specified at mount time is
+ * greater than the blocks per group use the super block value.
+ * Allocator needs it be less than blocks per group.
+ */
+ sbi->s_stripe = ext4_get_stripe_size(sbi);
/*
* set up enough so that it can read an inode
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists