lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 10 Jan 2008 21:03:58 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	Mingming Cao <cmm@...ibm.com>
Cc:	ext4 development <linux-ext4@...r.kernel.org>
Subject: patch queue update

Hi Mingming,

New patches for patch queue can be found at
http://www.radian.org/~kvaneesh/ext4/jan-10-2008-ver2/

The changes are
------------
a) mballoc patch got an explanation about regular allocator.
b) mballoc regular allocator we changed the usage of ffs to fls. I guess
it makes sense to use fls because we want to compare it against the
tunable s_mb_order2_reqs. Only request above this order are using
criteria 0 allocation.
c) stripe.patch to use the stripe size set in the super block for block
allocation.

The diff is attached for reference.

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0d31817..0085fde 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -468,7 +468,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *);
 static void ext4_mb_return_to_preallocation(struct inode *inode,
 					struct ext4_buddy *e4b, sector_t block,
 					int count);
-static void ext4_mb_show_ac(struct ext4_allocation_context *ac);
 static void ext4_mb_put_pa(struct ext4_allocation_context *, struct super_block *,
 						struct ext4_prealloc_space *pa);
 static int ext4_mb_init_per_dev_proc(struct super_block *sb);
@@ -1838,14 +1837,23 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
 		goto out;
 
-	i = ffs(ac->ac_g_ex.fe_len);
+	/*
+	 * ac->ac2_order is set only if the fe_len is a power of 2
+	 * if ac2_order is set we also set criteria to 0 so whtat we
+	 * try exact allocation using buddy.
+	 */
+	i = fls(ac->ac_g_ex.fe_len);
 	ac->ac_2order = 0;
-	/* FIXME!!
-	 * What happens if i is still greater than s_mb_order2_reqs
+	/*
+	 * We search using buddy data only if the order of the request
+	 * is greater than equal to the sbi_s_mb_order2_reqs
+	 * You can tune it via /proc/fs/ext4/<partition>/order2_req
 	 */
 	if (i >= sbi->s_mb_order2_reqs) {
-		i--;
-		if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0)
+		/*
+		 * This should tell if fe_len is exactly power of 2
+		 */
+		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
 			ac->ac_2order = i;
 	}
 
@@ -1865,17 +1873,17 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 		spin_unlock(&sbi->s_md_lock);
 	}
 
+	/* searching for the right group start from the goal value specified */
 	group = ac->ac_g_ex.fe_group;
 
 	/* Let's just scan groups to find more-less suitable blocks */
 	cr = ac->ac_2order ? 0 : 1;
+	/*
+	 * cr == 0 try to get exact allocation,
+	 * cr == 3  try to get anything
+	 */
 repeat:
 	for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
-		/* FIXME!!
-		 * We need to explain what criteria is and also
-		 * need to define the number 0 to 4 for criteria
-		 * What they actually means.
-		 */
 		ac->ac_criteria = cr;
 		for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
 			struct ext4_group_info *grp;
@@ -1889,23 +1897,28 @@ repeat:
 			if (grp->bb_free == 0)
 				continue;
 
+			/*
+			 * if the group is already init we check whether it is
+			 * a good group and if not we don't load the buddy
+			 */
 			if (EXT4_MB_GRP_NEED_INIT(EXT4_GROUP_INFO(sb, group))) {
-				/* we need full data about the group
-				 * to make a good selection */
+				/*
+				 * we need full data about the group
+				 * to make a good selection
+				 */
 				err = ext4_mb_load_buddy(sb, group, &e4b);
 				if (err)
 					goto out;
 				ext4_mb_release_desc(&e4b);
 			}
 
-			/* check is group good for our criteries */
+			/*
+			 * If the particular group doesn't satisfy our
+			 * criteria we continue with the next group
+			 */
 			if (!ext4_mb_good_group(ac, group, cr))
 				continue;
 
-			/* FIXME!!
-			 * here also we are loading the buddy. so what difference
-			 * does EXT4_MB_GRP_NEED_INIT actually make
-			 */
 			err = ext4_mb_load_buddy(sb, group, &e4b);
 			if (err)
 				goto out;
@@ -3726,10 +3739,9 @@ repeat:
 		busy = 0;
 		ext4_unlock_group(sb, group);
 		/*
-		 * We see this quiet rare. But if a particular workload is
-		 * effected by this we may need to add a waitqueue
+		 * Yield the CPU here so that we don't get soft lockup
 		 */
-		schedule_timeout(HZ);
+		schedule();
 		goto repeat;
 	}
 
@@ -3808,7 +3820,7 @@ repeat:
 			printk(KERN_ERR "uh-oh! used pa while discarding\n");
 			dump_stack();
 			current->state = TASK_UNINTERRUPTIBLE;
-			schedule();
+			schedule_timeout(HZ);
 			goto repeat;
 
 		}
@@ -3832,8 +3844,12 @@ repeat:
 		 * pa from inode's list may access already
 		 * freed memory, bad-bad-bad */
 
+		/* XXX: if this happens too often, we can
+		 * add a flag to force wait only in case
+		 * of ->clear_inode(), but not in case of
+		 * regular truncate */
 		current->state = TASK_UNINTERRUPTIBLE;
-		schedule();
+		schedule_timeout(HZ);
 		goto repeat;
 	}
 	spin_unlock(&ei->i_prealloc_lock);
@@ -3878,7 +3894,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
 {
 	BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
 }
-
+#ifdef MB_DEBUG
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
@@ -3928,6 +3944,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 	}
 	printk(KERN_ERR "\n");
 }
+#else
+#define ext4_mb_show_ac(x)
+#endif
 
 /*
  * We use locality group preallocation for small size file. The size of the
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c69f4e5..9d91c60 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1775,6 +1775,21 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
 	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
+static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
+{
+	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
+	unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width);
+
+	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
+		return sbi->s_stripe;
+	} else if (stripe_width <= sbi->s_blocks_per_group) {
+		return stripe_width;
+	} else if (stride <= sbi->s_blocks_per_group) {
+		return stride;
+	}
+
+	return 0;
+}
 
 static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 				__releases(kernel_sem)
@@ -2131,6 +2146,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
 	sbi->s_rsv_window_head.rsv_goal_size = 0;
 	ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
+	/*
+	 * set the stripe size. If we have specified it via mount option, then
+	 * use the mount option value. If the value specified at mount time is
+	 * greater than the blocks per group use the super block value.
+	 * Allocator needs it be less than blocks per group.
+	 */
+	sbi->s_stripe = ext4_get_stripe_size(sbi);
 
 	/*
 	 * set up enough so that it can read an inode
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ