[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20080107185758.GB15183@skywalker>
Date: Tue, 8 Jan 2008 00:27:58 +0530
From: "Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To: Alex Tomas <bzzz@....com>, Andreas Dilger <adilger@....com>
Cc: "linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH] mballoc changes from ldiskfs
On Mon, Jan 07, 2008 at 11:58:00PM +0530, Aneesh Kumar K.V wrote:
> Hi,
>
> This patch is not even compile tested. I am sending it over to find out
> whether some of the changes are even needed and to make sure i didn't
> drop any bug fix in the merge.
>
> something I noticed.
>
> a) prealloc table is completely gone.
> b) ext4_mb_put_pa change. ( I guess that is a bug with ldiskfs ).
>
>
> now by default request less that 64K use locality group preallocation.
>
> The ldiskfs change i looked at is from
> lustre/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch
[... snip... ]
>
> BUG_ON(ex->fe_len <= 0);
> - BUG_ON(ex->fe_len >= (1 << ac->ac_sb->s_blocksize_bits) * 8);
> - BUG_ON(ex->fe_start >= (1 << ac->ac_sb->s_blocksize_bits) * 8);
> + BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
> + BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
> BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
>
> ac->ac_found++;
> @@ -1553,7 +1546,6 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
> /* if the request is satisfied, then we try to find
> * an extent that still satisfy the request, but is
> * smaller than previous one */
> - if (ex->fe_len < bex->fe_len)
> *bex = *ex;
> }
This one is a bug fix for ext4 patch queue from Alex. So this change is needed.
>
> @@ -1702,8 +1694,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
> i = e4b->bd_info->bb_first_free;
>
> while (free && ac->ac_status == AC_STATUS_CONTINUE) {
> - i = ext4_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i);
> - if (i >= sb->s_blocksize * 8) {
> + i = ext4_find_next_zero_bit(bitmap, EXT4_BLOCKS_PER_GROUP(sb), i);
> + if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
> BUG_ON(free != 0);
> break;
> }
> @@ -1744,7 +1736,7 @@ static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
> i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
> % EXT4_BLOCKS_PER_GROUP(sb);
>
> - while (i < sb->s_blocksize * 8) {
> + while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
> if (!mb_test_bit(i, bitmap)) {
> max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
> if (max >= sbi->s_stripe) {
> @@ -1839,20 +1831,6 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
> ac->ac_2order = i;
> }
>
> - /* if stream allocation is enabled, use global goal */
> -
> - /* FIXME!!
> - * Need more explanation on what it is and how stream
> - * allocation is represented by the below conditional
> - */
> - if ((ac->ac_g_ex.fe_len < sbi->s_mb_large_req) &&
> - (ac->ac_flags & EXT4_MB_HINT_DATA)) {
> - /* TBD: may be hot point */
> - spin_lock(&sbi->s_md_lock);
> - ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
> - ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
> - spin_unlock(&sbi->s_md_lock);
> - }
>
> group = ac->ac_g_ex.fe_group;
>
> @@ -2291,7 +2269,8 @@ static void ext4_mb_history_init(struct super_block *sb)
> spin_lock_init(&sbi->s_mb_history_lock);
> i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
> sbi->s_mb_history = kmalloc(i, GFP_KERNEL);
> - memset(sbi->s_mb_history, 0, i);
> + if (likely(sbi->s_mb_history != NULL))
> + memset(sbi->s_mb_history, 0, i);
> /* if we can't allocate history, then we simple won't use it */
> }
>
> @@ -2300,7 +2279,7 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac)
> struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
> struct ext4_mb_history h;
>
> - if (likely(sbi->s_mb_history == NULL))
> + if (unlikely(sbi->s_mb_history == NULL))
> return;
>
> if (!(ac->ac_op & sbi->s_mb_history_filter))
> @@ -2312,11 +2291,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac)
> h.orig = ac->ac_o_ex;
> h.result = ac->ac_b_ex;
> h.flags = ac->ac_flags;
> - h.found = ac->ac_found;
> - h.groups = ac->ac_groups_scanned;
> - h.cr = ac->ac_criteria;
> - h.tail = ac->ac_tail;
> - h.buddy = ac->ac_buddy;
> h.merged = 0;
This one is a bug for ext4 patch queue from Alex. So this change is
needed.
[....]
> +
> + /* first, try to predict filesize */
> + /* XXX: should this table be tunable? */
Here we says we need to have tunables. Does that mean prealloc table is
needed ?
> + start = 0;
> + if (size <= 16 * 1024) {
> + size = 16 * 1024;
> + } else if (size <= 32 * 1024) {
> + size = 32 * 1024;
> + } else if (size <= 64 * 1024) {
> + size = 64 * 1024;
> + } else if (size <= 128 * 1024) {
> + size = 128 * 1024;
> + } else if (size <= 256 * 1024) {
> + size = 256 * 1024;
> + } else if (size <= 512 * 1024) {
> + size = 512 * 1024;
> + } else if (size <= 1024 * 1024) {
> + size = 1024 * 1024;
> + } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
> + start = ac->ac_o_ex.fe_logical << bsbits;
> + start = (start / (1024 * 1024)) * (1024 * 1024);
> + size = 1024 * 1024;
> + } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
> + start = ac->ac_o_ex.fe_logical << bsbits;
> + start = (start / (4 * (1024 * 1024))) * 4 * (1024 * 1024);
> + size = 4 * 1024 * 1024;
> + } else if(NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,(8<<20)>>bsbits,max,bsbits)){
> + start = ac->ac_o_ex.fe_logical;
> + start = start << bsbits;
> + start = (start / (8 * (1024 * 1024))) * 8 * (1024 * 1024);
> + size = 8 * 1024 * 1024;
> + } else {
> + start = ac->ac_o_ex.fe_logical;
> + start = start << bsbits;
> + size = ac->ac_o_ex.fe_len << bsbits;
> }
> - orig_size = size;
> - orig_start = start;
> + orig_size = size = size >> bsbits;
> + orig_start = start = start >> bsbits;
>
> /* don't cover already allocated blocks in selected range */
> if (ar->pleft && start <= ar->lleft) {
> @@ -3203,22 +3098,6 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
> ac->ac_g_ex.fe_logical = start;
> ac->ac_g_ex.fe_len = size;
>
> - /* define goal start in order to merge */
> - if (ar->pright && (ar->lright == (start + size))) {
> - /* merge to the right */
> - ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
> - &ac->ac_f_ex.fe_group,
> - &ac->ac_f_ex.fe_start);
> - ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
> - }
> - if (ar->pleft && (ar->lleft + 1 == start)) {
> - /* merge to the left */
> - ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
> - &ac->ac_f_ex.fe_group,
> - &ac->ac_f_ex.fe_start);
> - ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
> - }
> -
> mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size,
> (unsigned) orig_size, (unsigned) start);
> }
> @@ -3395,8 +3274,10 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
> &groupnr, &start);
> len = pa->pa_len;
> spin_unlock(&pa->pa_lock);
> + if (unlikely(len == 0))
> + continue;
> BUG_ON(groupnr != group);
> - mb_set_bits(bitmap, start, len);
> + mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), bitmap, start, len);
> preallocated += len;
> count++;
> }
> @@ -3425,7 +3306,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
>
> /* in this short window concurrent discard can set pa_deleted */
> spin_lock(&pa->pa_lock);
> - if (pa->pa_deleted == 1) {
> + if (pa->pa_deleted == 0) {
> spin_unlock(&pa->pa_lock);
> return;
> }
I think that should == 1 (ldiskfs have it == 0)
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists