lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090611055006.GA8073@skywalker>
Date:	Thu, 11 Jun 2009 11:20:06 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	Theodore Tso <tytso@....edu>
Cc:	Nick Dokos <nicholas.dokos@...com>,
	Valerie Aurora <vaurora@...hat.com>, linux-ext4@...r.kernel.org
Subject: Re: Some 64-bit tests

On Wed, Jun 10, 2009 at 02:13:20PM -0400, Theodore Tso wrote:
> On Mon, Jun 08, 2009 at 11:13:48PM -0400, Nick Dokos wrote:
> > 
> > I tried this on top of 2.6.30-rc8 and I hit a couple of BUGs, one in pdflush
> > and the other in the Lustre teest program (liverfs):
> > 
> > Jun  8 22:49:13 shifter kernel: ------------[ cut here ]------------
> > Jun  8 22:49:13 shifter kernel: kernel BUG at fs/ext4/mballoc.c:3245!
> > Jun  8 22:49:13 shifter kernel: invalid opcode: 0000 [#1] SMP 
> 
> Hmmm, that would be the BUG_ON check:
> 
>      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
> 

I already have a RFC patch which Nick actually tested. It is giving 32MB
extents, which is expected because the max order in buddy cache is
blocksize_bits + 1. I have a Fixme in there regarding scaling the start
block which was hoping to fix soon.

Attaching the patch below.

commit f1fbc2ac43fefb6bac227fc995fe2b79c67ccfad
Author: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
Date:   Tue Jun 9 01:38:53 2009 +0530

    ext4: Use different normalization method for allocation size.
    
    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e..9745b84 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -633,7 +633,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
 
 	BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
 
-	border = 2 << sb->s_blocksize_bits;
+	border = 1 << (sb->s_blocksize_bits + 1);
 
 	while (len > 0) {
 		/* find how many blocks can be covered since this position */
@@ -3063,8 +3063,10 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
-	int bsbits, max;
+	loff_t max;
 	ext4_lblk_t end;
+	int bsbits, chunk_blks;
+	unsigned int s_mb_stream_request;
 	loff_t size, orig_size, start_off;
 	ext4_lblk_t start, orig_start;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
@@ -3090,54 +3092,61 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	}
 
 	bsbits = ac->ac_sb->s_blocksize_bits;
+	s_mb_stream_request = EXT4_SB(ac->ac_sb)->s_mb_stream_request;
+	/* make sure this is power of 2 */
+	s_mb_stream_request =
+		roundup_pow_of_two((unsigned long)s_mb_stream_request);
 
 	/* first, let's learn actual file size
 	 * given current request is allocated */
 	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
-	size = size << bsbits;
-	if (size < i_size_read(ac->ac_inode))
-		size = i_size_read(ac->ac_inode);
-
-	/* max size of free chunks */
-	max = 2 << bsbits;
+	if (size < (i_size_read(ac->ac_inode) >> bsbits))
+		size = i_size_read(ac->ac_inode) >> bsbits;
+	/*
+	 * max free chunk blocks.
+	 * (max buddy cache order is (bsbits + 1).
+	 */
+	max = 1 << (bsbits + 1);
 
-#define NRL_CHECK_SIZE(req, size, max, chunk_size)	\
-		(req <= (size) || max <= (chunk_size))
+	/*
+	 * If buddy cache says it can have more than
+	 * blocks per group then limit to blocks per group.
+	 */
+	if (max > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+		max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
 
 	/* first, try to predict filesize */
 	/* XXX: should this table be tunable? */
-	start_off = 0;
-	if (size <= 16 * 1024) {
-		size = 16 * 1024;
-	} else if (size <= 32 * 1024) {
-		size = 32 * 1024;
-	} else if (size <= 64 * 1024) {
-		size = 64 * 1024;
-	} else if (size <= 128 * 1024) {
-		size = 128 * 1024;
-	} else if (size <= 256 * 1024) {
-		size = 256 * 1024;
-	} else if (size <= 512 * 1024) {
-		size = 512 * 1024;
-	} else if (size <= 1024 * 1024) {
-		size = 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
-		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-						(21 - bsbits)) << 21;
-		size = 2 * 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
-		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-							(22 - bsbits)) << 22;
-		size = 4 * 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
-					(8<<20)>>bsbits, max, 8 * 1024)) {
-		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-							(23 - bsbits)) << 23;
-		size = 8 * 1024 * 1024;
-	} else {
-		start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
-		size	  = ac->ac_o_ex.fe_len << bsbits;
+	/*
+	 * less than s_mb_stream_request is using
+	 * locality group preallocation
+	 */
+	if (size <= s_mb_stream_request) {
+		size = s_mb_stream_request << bsbits;
+		goto found_size;
+	}
+	chunk_blks = s_mb_stream_request << 1;
+	while (1) {
+		if (size <= chunk_blks) {
+			if (max <= chunk_blks)
+				size = max << bsbits;
+			else
+				size = chunk_blks << bsbits;
+			break;
+		}
+		chunk_blks = chunk_blks << 1;
 	}
+
+found_size:
+#if 0
+	/* Will i end up requesting for less that what i asked for ? */
+	start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits) & ~(size - 1);
+	start_off = start_off * size;
+#else
+	start_off = (loff_t)(ac->ac_o_ex.fe_logical << bsbits);
+#endif
+
+	/* convert into blocks */
 	orig_size = size = size >> bsbits;
 	orig_start = start = start_off >> bsbits;
 
@@ -3216,6 +3225,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
+
+	if (size <= 0 ||  size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+		printk(KERN_ALERT "size is %ld orig size is %ld\n", (long)size, (long)orig_size);
+
 	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
 	/* now prepare goal request */
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ