lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Tue, 8 Jan 2008 00:27:58 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	Alex Tomas <bzzz@....com>, Andreas Dilger <adilger@....com>
Cc:	"linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH] mballoc changes from ldiskfs

On Mon, Jan 07, 2008 at 11:58:00PM +0530, Aneesh Kumar K.V wrote:
> Hi,
> 
> This patch is  not even compile tested. I am sending it over to find out
> whether some of the changes are even needed and to make sure i didn't
> drop any bug fix in the merge.
> 
> something I noticed.
> 
> a) prealloc table is completely gone.
> b) ext4_mb_put_pa change. ( I guess that is a bug with ldiskfs ).
> 
> 
> now by default request less that 64K use locality group preallocation.
> 
> The ldiskfs change i looked at is from
> lustre/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch

[... snip... ]

> 
>  	BUG_ON(ex->fe_len <= 0);
> -	BUG_ON(ex->fe_len >= (1 << ac->ac_sb->s_blocksize_bits) * 8);
> -	BUG_ON(ex->fe_start >= (1 << ac->ac_sb->s_blocksize_bits) * 8);
> +	BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
> +	BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
>  	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
> 
>  	ac->ac_found++;
> @@ -1553,7 +1546,6 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
>  		/* if the request is satisfied, then we try to find
>  		 * an extent that still satisfy the request, but is
>  		 * smaller than previous one */
> -		if (ex->fe_len < bex->fe_len)
>  			*bex = *ex;
>  	}


This one is a bug fix for ext4 patch queue from Alex. So this change is needed.


> 
> @@ -1702,8 +1694,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  	i = e4b->bd_info->bb_first_free;
> 
>  	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
> -		i = ext4_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i);
> -		if (i >= sb->s_blocksize * 8) {
> +		i = ext4_find_next_zero_bit(bitmap, EXT4_BLOCKS_PER_GROUP(sb), i);
> +		if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
>  			BUG_ON(free != 0);
>  			break;
>  		}
> @@ -1744,7 +1736,7 @@ static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
>  	i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
>  			% EXT4_BLOCKS_PER_GROUP(sb);
> 
> -	while (i < sb->s_blocksize * 8) {
> +	while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
>  		if (!mb_test_bit(i, bitmap)) {
>  			max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
>  			if (max >= sbi->s_stripe) {
> @@ -1839,20 +1831,6 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
>  			ac->ac_2order = i;
>  	}
> 
> -	/* if stream allocation is enabled, use global goal */
> -
> -	/* FIXME!!
> -	 * Need more explanation on what it is and how stream
> -	 * allocation is represented by the below conditional
> -	 */
> -	if ((ac->ac_g_ex.fe_len < sbi->s_mb_large_req) &&
> -			(ac->ac_flags & EXT4_MB_HINT_DATA)) {
> -		/* TBD: may be hot point */
> -		spin_lock(&sbi->s_md_lock);
> -		ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
> -		ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
> -		spin_unlock(&sbi->s_md_lock);
> -	}
> 
>  	group = ac->ac_g_ex.fe_group;
> 
> @@ -2291,7 +2269,8 @@ static void ext4_mb_history_init(struct super_block *sb)
>  	spin_lock_init(&sbi->s_mb_history_lock);
>  	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
>  	sbi->s_mb_history = kmalloc(i, GFP_KERNEL);
> -	memset(sbi->s_mb_history, 0, i);
> +	if (likely(sbi->s_mb_history != NULL))
> +		memset(sbi->s_mb_history, 0, i);
>  	/* if we can't allocate history, then we simple won't use it */
>  }
> 
> @@ -2300,7 +2279,7 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac)
>  	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
>  	struct ext4_mb_history h;
> 
> -	if (likely(sbi->s_mb_history == NULL))
> +	if (unlikely(sbi->s_mb_history == NULL))
>  		return;
> 
>  	if (!(ac->ac_op & sbi->s_mb_history_filter))
> @@ -2312,11 +2291,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac)
>  	h.orig = ac->ac_o_ex;
>  	h.result = ac->ac_b_ex;
>  	h.flags = ac->ac_flags;
> -	h.found = ac->ac_found;
> -	h.groups = ac->ac_groups_scanned;
> -	h.cr = ac->ac_criteria;
> -	h.tail = ac->ac_tail;
> -	h.buddy = ac->ac_buddy;
>  	h.merged = 0;


This one is a bug for ext4 patch queue from Alex. So this change is
needed.


[....]
> +
> +	/* first, try to predict filesize */
> +	/* XXX: should this table be tunable? */

Here we says we need to have tunables. Does that mean prealloc table is
needed ?


> +	start = 0;
> +	if (size <= 16 * 1024) {
> +		size = 16 * 1024;
> +	} else if (size <= 32 * 1024) {
> +		size = 32 * 1024;
> +	} else if (size <= 64 * 1024) {
> +		size = 64 * 1024;
> +	} else if (size <= 128 * 1024) {
> +		size = 128 * 1024;
> +	} else if (size <= 256 * 1024) {
> +		size = 256 * 1024;
> +	} else if (size <= 512 * 1024) {
> +		size = 512 * 1024;
> +	} else if (size <= 1024 * 1024) {
> +		size = 1024 * 1024;
> +	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
> +		start = ac->ac_o_ex.fe_logical << bsbits;
> +		start = (start / (1024 * 1024)) * (1024 * 1024);
> +		size = 1024 * 1024;
> +	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
> +		start = ac->ac_o_ex.fe_logical << bsbits;
> +		start = (start / (4 * (1024 * 1024))) * 4 * (1024 * 1024);
> +		size = 4 * 1024 * 1024;
> +	} else if(NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,(8<<20)>>bsbits,max,bsbits)){
> +		start = ac->ac_o_ex.fe_logical;
> +		start = start << bsbits;
> +		start = (start / (8 * (1024 * 1024))) * 8 * (1024 * 1024);
> +		size = 8 * 1024 * 1024;
> +	} else {
> +		start = ac->ac_o_ex.fe_logical;
> +		start = start << bsbits;
> +		size = ac->ac_o_ex.fe_len << bsbits;
>  	}
> -	orig_size = size;
> -	orig_start = start;
> +	orig_size = size = size >> bsbits;
> +	orig_start = start = start >> bsbits;
> 
>  	/* don't cover already allocated blocks in selected range */
>  	if (ar->pleft && start <= ar->lleft) {
> @@ -3203,22 +3098,6 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
>  	ac->ac_g_ex.fe_logical = start;
>  	ac->ac_g_ex.fe_len = size;
> 
> -	/* define goal start in order to merge */
> -	if (ar->pright && (ar->lright == (start + size))) {
> -		/* merge to the right */
> -		ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
> -						&ac->ac_f_ex.fe_group,
> -						&ac->ac_f_ex.fe_start);
> -		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
> -	}
> -	if (ar->pleft && (ar->lleft + 1 == start)) {
> -		/* merge to the left */
> -		ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
> -						&ac->ac_f_ex.fe_group,
> -						&ac->ac_f_ex.fe_start);
> -		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
> -	}
> -
>  	mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size,
>  		(unsigned) orig_size, (unsigned) start);
>  }
> @@ -3395,8 +3274,10 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
>  					     &groupnr, &start);
>  		len = pa->pa_len;
>  		spin_unlock(&pa->pa_lock);
> +		if (unlikely(len == 0))
> +			continue;
>  		BUG_ON(groupnr != group);
> -		mb_set_bits(bitmap, start, len);
> +		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), bitmap, start, len);
>  		preallocated += len;
>  		count++;
>  	}
> @@ -3425,7 +3306,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
> 
>  	/* in this short window concurrent discard can set pa_deleted */
>  	spin_lock(&pa->pa_lock);
> -	if (pa->pa_deleted == 1) {
> +	if (pa->pa_deleted == 0) {
>  		spin_unlock(&pa->pa_lock);
>  		return;
>  	}


I think that should == 1 (ldiskfs have it == 0)


-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ