lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080107182800.GA15183@skywalker>
Date:	Mon, 7 Jan 2008 23:58:00 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	Alex Tomas <bzzz@....com>, Andreas Dilger <adilger@....com>
Cc:	"linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: [PATCH] mballoc changes from ldiskfs

Hi,

This patch is  not even compile tested. I am sending it over to find out
whether some of the changes are even needed and to make sure i didn't
drop any bug fix in the merge.

something I noticed.

a) prealloc table is completely gone.
b) ext4_mb_put_pa change. ( I guess that is a bug with ldiskfs ).


now by default request less that 64K use locality group preallocation.

The ldiskfs change i looked at is from
lustre/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 58a70a1..cb84516 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -532,10 +532,10 @@ static inline void mb_set_bit(int bit, void *addr)
 	ext4_set_bit(bit, addr);
 }
 
-static inline void mb_set_bit_atomic(int bit, void *addr)
+static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
 {
 	mb_correct_addr_and_bit(bit, addr);
-	ext4_set_bit_atomic(NULL, bit, addr);
+	ext4_set_bit_atomic(lock, bit, addr);
 }
 
 static inline void mb_clear_bit(int bit, void *addr)
@@ -544,10 +544,10 @@ static inline void mb_clear_bit(int bit, void *addr)
 	ext4_clear_bit(bit, addr);
 }
 
-static inline void mb_clear_bit_atomic(int bit, void *addr)
+static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
 {
 	mb_correct_addr_and_bit(bit, addr);
-	ext4_clear_bit_atomic(NULL, bit, addr);
+	ext4_clear_bit_atomic(lock, bit, addr);
 }
 
 static inline void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
@@ -1155,7 +1155,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
 	return 0;
 }
 
-static void mb_clear_bits(void *bm, int cur, int len)
+static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1168,12 +1168,12 @@ static void mb_clear_bits(void *bm, int cur, int len)
 			cur += 32;
 			continue;
 		}
-		mb_clear_bit_atomic(cur, bm);
+		mb_clear_bit_atomic(lock, cur, bm);
 		cur++;
 	}
 }
 
-static void mb_set_bits(void *bm, int cur, int len)
+static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1186,7 +1186,7 @@ static void mb_set_bits(void *bm, int cur, int len)
 			cur += 32;
 			continue;
 		}
-		mb_set_bit_atomic(cur, bm);
+		mb_set_bit_atomic(lock, cur, bm);
 		cur++;
 	}
 }
@@ -1403,7 +1403,8 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 		e4b->bd_info->bb_counters[ord]++;
 	}
 
-	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+	mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
+			EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
 	mb_check_buddy(e4b);
 
 	return ret;
@@ -1439,14 +1440,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
 	get_page(ac->ac_bitmap_page);
 	ac->ac_buddy_page = e4b->bd_buddy_page;
 	get_page(ac->ac_buddy_page);
-
-	/* store last allocated for subsequent stream allocation */
-	if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
-		spin_lock(&sbi->s_md_lock);
-		sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
-		sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
-		spin_unlock(&sbi->s_md_lock);
-	}
 }
 
 /*
@@ -1509,8 +1502,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
 	struct ext4_free_extent *gex = &ac->ac_g_ex;
 
 	BUG_ON(ex->fe_len <= 0);
-	BUG_ON(ex->fe_len >= (1 << ac->ac_sb->s_blocksize_bits) * 8);
-	BUG_ON(ex->fe_start >= (1 << ac->ac_sb->s_blocksize_bits) * 8);
+	BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+	BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
 
 	ac->ac_found++;
@@ -1553,7 +1546,6 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
 		/* if the request is satisfied, then we try to find
 		 * an extent that still satisfy the request, but is
 		 * smaller than previous one */
-		if (ex->fe_len < bex->fe_len)
 			*bex = *ex;
 	}
 
@@ -1702,8 +1694,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 	i = e4b->bd_info->bb_first_free;
 
 	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
-		i = ext4_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i);
-		if (i >= sb->s_blocksize * 8) {
+		i = ext4_find_next_zero_bit(bitmap, EXT4_BLOCKS_PER_GROUP(sb), i);
+		if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
 			BUG_ON(free != 0);
 			break;
 		}
@@ -1744,7 +1736,7 @@ static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
 	i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
 			% EXT4_BLOCKS_PER_GROUP(sb);
 
-	while (i < sb->s_blocksize * 8) {
+	while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
 		if (!mb_test_bit(i, bitmap)) {
 			max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
 			if (max >= sbi->s_stripe) {
@@ -1839,20 +1831,6 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 			ac->ac_2order = i;
 	}
 
-	/* if stream allocation is enabled, use global goal */
-
-	/* FIXME!!
-	 * Need more explanation on what it is and how stream
-	 * allocation is represented by the below conditional
-	 */
-	if ((ac->ac_g_ex.fe_len < sbi->s_mb_large_req) &&
-			(ac->ac_flags & EXT4_MB_HINT_DATA)) {
-		/* TBD: may be hot point */
-		spin_lock(&sbi->s_md_lock);
-		ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
-		ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
-		spin_unlock(&sbi->s_md_lock);
-	}
 
 	group = ac->ac_g_ex.fe_group;
 
@@ -2291,7 +2269,8 @@ static void ext4_mb_history_init(struct super_block *sb)
 	spin_lock_init(&sbi->s_mb_history_lock);
 	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
 	sbi->s_mb_history = kmalloc(i, GFP_KERNEL);
-	memset(sbi->s_mb_history, 0, i);
+	if (likely(sbi->s_mb_history != NULL))
+		memset(sbi->s_mb_history, 0, i);
 	/* if we can't allocate history, then we simple won't use it */
 }
 
@@ -2300,7 +2279,7 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac)
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_mb_history h;
 
-	if (likely(sbi->s_mb_history == NULL))
+	if (unlikely(sbi->s_mb_history == NULL))
 		return;
 
 	if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2312,11 +2291,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac)
 	h.orig = ac->ac_o_ex;
 	h.result = ac->ac_b_ex;
 	h.flags = ac->ac_flags;
-	h.found = ac->ac_found;
-	h.groups = ac->ac_groups_scanned;
-	h.cr = ac->ac_criteria;
-	h.tail = ac->ac_tail;
-	h.buddy = ac->ac_buddy;
 	h.merged = 0;
 	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
 		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
@@ -2404,6 +2378,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 				"EXT4-fs: can't read descriptor %lu\n", i);
 			goto err_freebuddy;
 		}
+		memset(meta_group_info[j], 0, len);
 		set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
 			&meta_group_info[j]->bb_state);
 
@@ -2512,32 +2487,10 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
 	sbi->s_mb_max_groups_to_scan = MB_DEFAULT_MAX_GROUPS_TO_SCAN;
 	sbi->s_mb_stats = MB_DEFAULT_STATS;
+	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
 	sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
 
-	sbi->s_mb_prealloc_table_size = 7;
-	i = sbi->s_mb_prealloc_table_size;
-	sbi->s_mb_prealloc_table = kmalloc(sizeof(unsigned long) * i,
-						GFP_NOFS);
-	if (sbi->s_mb_prealloc_table == NULL) {
-		clear_opt(sbi->s_mount_opt, MBALLOC);
-		kfree(sbi->s_mb_offsets);
-		kfree(sbi->s_mb_maxs);
-		return -ENOMEM;
-	}
-
-	sbi->s_mb_prealloc_table[0] = 4;
-	sbi->s_mb_prealloc_table[1] = 8;
-	sbi->s_mb_prealloc_table[2] = 16;
-	sbi->s_mb_prealloc_table[3] = 32;
-	sbi->s_mb_prealloc_table[4] = 64;
-	sbi->s_mb_prealloc_table[5] = 128;
-	sbi->s_mb_prealloc_table[6] = 256;
-
-	sbi->s_mb_small_req = 256;
-	sbi->s_mb_large_req = 1024;
-	sbi->s_mb_group_prealloc = 512;
-
 	i = sizeof(struct ext4_locality_group) * NR_CPUS;
 	sbi->s_locality_groups = kmalloc(i, GFP_NOFS);
 	if (sbi->s_locality_groups == NULL) {
@@ -2713,75 +2666,9 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
 #define EXT4_MB_MAX_TO_SCAN_NAME	"max_to_scan"
 #define EXT4_MB_MIN_TO_SCAN_NAME	"min_to_scan"
 #define EXT4_MB_ORDER2_REQ		"order2_req"
-#define EXT4_MB_SMALL_REQ		"small_req"
-#define EXT4_MB_LARGE_REQ		"large_req"
-#define EXT4_MB_PREALLOC_TABLE		"prealloc_table"
-#define EXT4_MB_GROUP_PREALLOC		"group_prealloc"
-
-static int ext4_mb_read_prealloc_table(char *page, char **start,
-			off_t off, int count, int *eof, void *data)
-{
-	struct ext4_sb_info *sbi = data;
-	int len = 0;
-	int i;
-
-	*eof = 1;
-	if (off != 0)
-		return 0;
-	for (i = 0; i < sbi->s_mb_prealloc_table_size; i++)
-		len += sprintf(page + len, "%ld ",
-				sbi->s_mb_prealloc_table[i]);
-	len += sprintf(page + len, "\n");
-	*start = page;
-	return len;
-}
-
-static int ext4_mb_write_prealloc_table(struct file *file,
-			const char __user *buf, unsigned long cnt, void *data)
-{
-	struct ext4_sb_info *sbi = data;
-	unsigned long value;
-	unsigned long prev = 0;
-	char str[128];
-	char *cur;
-	char *end;
-	unsigned long *new_table;
-	int num = 0;
-	int i = 0;
-
-	if (cnt >= sizeof(str))
-		return -EINVAL;
-	if (copy_from_user(str, buf, cnt))
-		return -EFAULT;
+#define EXT4_MB_STREAM_REQ		"stream_req"
 
-	num = 0;
-	cur = str;
-	end = str + cnt;
-	while (cur < end) {
-		while ((cur < end) && (*cur == ' ')) cur++;
-		value = simple_strtol(cur, &cur, 0);
-		if (value == 0)
-			break;
-		if (value <= prev)
-			return -EINVAL;
-		prev = value;
-		num++;
-	}
 
-	new_table = kmalloc(num * sizeof(*new_table), GFP_KERNEL);
-	if (new_table == NULL)
-		return -ENOMEM;
-	kfree(sbi->s_mb_prealloc_table);
-	sbi->s_mb_prealloc_table = new_table;
-	sbi->s_mb_prealloc_table_size = num;
-	cur = str;
-	end = str + cnt;
-	while (cur < end && i < num) {
-		while ((cur < end) && (*cur == ' ')) cur++;
-		new_table[i++] = simple_strtol(cur, &cur, 0);
-	}
-	return cnt;
-}
 
 #define MB_PROC_VALUE_READ(name)				\
 static int ext4_mb_read_##name(char *page, char **start,	\
@@ -2823,12 +2710,8 @@ MB_PROC_VALUE_READ(min_to_scan);
 MB_PROC_VALUE_WRITE(min_to_scan);
 MB_PROC_VALUE_READ(order2_reqs);
 MB_PROC_VALUE_WRITE(order2_reqs);
-MB_PROC_VALUE_READ(small_req);
-MB_PROC_VALUE_WRITE(small_req);
-MB_PROC_VALUE_READ(large_req);
-MB_PROC_VALUE_WRITE(large_req);
-MB_PROC_VALUE_READ(group_prealloc);
-MB_PROC_VALUE_WRITE(group_prealloc);
+MB_PROC_VALUE_READ(stream_request);
+MB_PROC_VALUE_WRITE(stream_request);
 
 #define	MB_PROC_HANDLER(name, var)					\
 do {									\
@@ -2857,18 +2740,13 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
 	MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
 	MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
 	MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
-	MB_PROC_HANDLER(EXT4_MB_SMALL_REQ, small_req);
-	MB_PROC_HANDLER(EXT4_MB_LARGE_REQ, large_req);
-	MB_PROC_HANDLER(EXT4_MB_PREALLOC_TABLE, prealloc_table);
-	MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
+	MB_PROC_HANDLER(EXT3_MB_STREAM_REQ, stream_request);
 
 	return 0;
 
 err_out:
-	remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
-	remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_mb_proc);
-	remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_mb_proc);
-	remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_mb_proc);
+	printk(KERN_ERR "EXT4-fs: Unable to create %s\n", name);
+	remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
@@ -2889,10 +2767,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
 
 	snprintf(devname, sizeof(devname) - 1, "%s",
 		bdevname(sb->s_bdev, devname));
-	remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
-	remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_mb_proc);
-	remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_mb_proc);
-	remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_mb_proc);
+	remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
@@ -3035,7 +2910,11 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 	struct ext4_locality_group *lg = ac->ac_lg;
 
 	BUG_ON(lg == NULL);
-	ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
+	if (EXT4_SB(sb)->s_stripe)
+		ac->ac_g_ex.fe_len = EXT3_SB(sb)->s_stripe;
+	else
+		ac->ac_g_ex.fe_len = (1024 * 1024) >> sb->s_blocksize_bits;
+
 	mb_debug("#%u: goal %lu blocks for locality group\n",
 		current->pid, ac->ac_g_ex.fe_len);
 }
@@ -3085,34 +2964,50 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 		size = i_size_read(ac->ac_inode);
 	size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits;
 
-	start = 0;
-	wind = 0;
+	/* max available blocks in a free group */
+	max = EXT3_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1
+		- EXT3_SB(ac->ac_sb)->s_itb_per_group;
 
-	/* let's choose preallocation window depending on file size */
-	for (i = 0; i < sbi->s_mb_prealloc_table_size; i++) {
-		if (size <= sbi->s_mb_prealloc_table[i]) {
-			wind = sbi->s_mb_prealloc_table[i];
-			break;
-		}
-	}
-	size = wind;
-
-	if (wind == 0) {
-		__u64 tstart, tend;
-		/* file is quite large, we now preallocate with
-		 * the biggest configured window with regart to
-		 * logical offset */
-		wind = sbi->s_mb_prealloc_table[i - 1];
-		tstart = ac->ac_o_ex.fe_logical;
-		do_div(tstart, wind);
-		start = tstart * wind;
-		tend = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len - 1;
-		do_div(tend, wind);
-		tend = tend * wind + wind;
-		size = tend - start;
+#define NRL_CHECK_SIZE(req,size,max,bits)	\
+		(req <= (size) || max <= ((size) >> bits))
+
+	/* first, try to predict filesize */
+	/* XXX: should this table be tunable? */
+	start = 0;
+	if (size <= 16 * 1024) {
+		size = 16 * 1024;
+	} else if (size <= 32 * 1024) {
+		size = 32 * 1024;
+	} else if (size <= 64 * 1024) {
+		size = 64 * 1024;
+	} else if (size <= 128 * 1024) {
+		size = 128 * 1024;
+	} else if (size <= 256 * 1024) {
+		size = 256 * 1024;
+	} else if (size <= 512 * 1024) {
+		size = 512 * 1024;
+	} else if (size <= 1024 * 1024) {
+		size = 1024 * 1024;
+	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
+		start = ac->ac_o_ex.fe_logical << bsbits;
+		start = (start / (1024 * 1024)) * (1024 * 1024);
+		size = 1024 * 1024;
+	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
+		start = ac->ac_o_ex.fe_logical << bsbits;
+		start = (start / (4 * (1024 * 1024))) * 4 * (1024 * 1024);
+		size = 4 * 1024 * 1024;
+	} else if(NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,(8<<20)>>bsbits,max,bsbits)){
+		start = ac->ac_o_ex.fe_logical;
+		start = start << bsbits;
+		start = (start / (8 * (1024 * 1024))) * 8 * (1024 * 1024);
+		size = 8 * 1024 * 1024;
+	} else {
+		start = ac->ac_o_ex.fe_logical;
+		start = start << bsbits;
+		size = ac->ac_o_ex.fe_len << bsbits;
 	}
-	orig_size = size;
-	orig_start = start;
+	orig_size = size = size >> bsbits;
+	orig_start = start = start >> bsbits;
 
 	/* don't cover already allocated blocks in selected range */
 	if (ar->pleft && start <= ar->lleft) {
@@ -3203,22 +3098,6 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	ac->ac_g_ex.fe_logical = start;
 	ac->ac_g_ex.fe_len = size;
 
-	/* define goal start in order to merge */
-	if (ar->pright && (ar->lright == (start + size))) {
-		/* merge to the right */
-		ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
-						&ac->ac_f_ex.fe_group,
-						&ac->ac_f_ex.fe_start);
-		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
-	}
-	if (ar->pleft && (ar->lleft + 1 == start)) {
-		/* merge to the left */
-		ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
-						&ac->ac_f_ex.fe_group,
-						&ac->ac_f_ex.fe_start);
-		ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
-	}
-
 	mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size,
 		(unsigned) orig_size, (unsigned) start);
 }
@@ -3395,8 +3274,10 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					     &groupnr, &start);
 		len = pa->pa_len;
 		spin_unlock(&pa->pa_lock);
+		if (unlikely(len == 0))
+			continue;
 		BUG_ON(groupnr != group);
-		mb_set_bits(bitmap, start, len);
+		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), bitmap, start, len);
 		preallocated += len;
 		count++;
 	}
@@ -3425,7 +3306,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 
 	/* in this short window concurrent discard can set pa_deleted */
 	spin_lock(&pa->pa_lock);
-	if (pa->pa_deleted == 1) {
+	if (pa->pa_deleted == 0) {
 		spin_unlock(&pa->pa_lock);
 		return;
 	}
@@ -3641,7 +3522,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
 
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
-	BUG_ON(group != e4b->bd_group);
+	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	end = bit + pa->pa_len;
 
 	ac.ac_sb = sb;
@@ -3696,7 +3577,7 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
-	BUG_ON(group != e4b->bd_group);
+	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
 	atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
 
@@ -3997,19 +3878,19 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+	int bsbits = ac->ac_sb->s_blocksize_bits;
+	loff_t size, isize;
 
 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
 		return;
 
-	/* request is so large that we don't care about
-	 * streaming - it overweights any possible seek */
-	if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req)
-		return;
+	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
+	isize = i_size_read(ac->ac_inode) >> bsbits;
+	if (size < isize)
+		size = isize;
 
-	/* FIXME!!
-	 * is this  >=  considering the above ?
-	 */
-	if (ac->ac_o_ex.fe_len >= sbi->s_mb_small_req)
+	/* don't use group allocation for large files */
+	if (size >= sbi->s_mb_stream_request)
 		return;
 
 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
@@ -4419,7 +4300,8 @@ do_more:
 			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
 	}
 #endif
-	mb_clear_bits(bitmap_bh->b_data, bit, count);
+	mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
+			bit, count);
 
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 85100ea..4d4ad18 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -107,19 +107,12 @@ struct ext4_sb_info {
 	/* tunables */
 	unsigned long s_mb_factor;
 	unsigned long s_stripe;
-	unsigned long s_mb_small_req;
-	unsigned long s_mb_large_req;
+	unsigned long s_mb_stream_request;
 	unsigned long s_mb_max_to_scan;
 	unsigned long s_mb_min_to_scan;
 	unsigned long s_mb_max_groups_to_scan;
 	unsigned long s_mb_stats;
 	unsigned long s_mb_order2_reqs;
-	unsigned long *s_mb_prealloc_table;
-	unsigned long s_mb_prealloc_table_size;
-	unsigned long s_mb_group_prealloc;
-	/* where last allocation was done - for stream allocation */
-	unsigned long s_mb_last_group;
-	unsigned long s_mb_last_start;
 
 	/* history to debug policy */
 	struct ext4_mb_history *s_mb_history;
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ