lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-id: <20090409021139.GN17302@webber.adilger.int>
Date:	Wed, 08 Apr 2009 20:11:39 -0600
From:	Andreas Dilger <adilger@....com>
To:	linux-ext4@...r.kernel.org
Cc:	Alex Zhuravlev <Alex.Zhuravlev@....com>,
	Aneesh Kumar <aneesh.kumar@...ux.vnet.ibm.com>
Subject: [PATCH, RFC] ext4_lock_group() vs. sb_bgl_lock()

We hit a bug in the Lustre mballoc code recently that exposed a race
condition between read_block_bitmap() and mark_diskspace_used().

This is caught by an extra sanity check that is in our mballoc, but
not upstream, and I suspect the same still exists in the upstream ext4
code.  In Lustre at least this doesn't result in data corruption
(AFAICS) but it BUGS on the sanity check, and since upstream doesn't
have that check it might slip by.

The root of the problem is that most of the ext4 code is using
sb_bgl_lock() to protect the group descriptors and block bitmaps, but
in some places the mballoc code uses ext4_lock_group() instead.

For example, mb_mark_used() only appears to be holding ext4_lock_group(),
but then calls mb_set_bit() instead of mb_set_bit_atomic().  Later on,
however, it calls mb_set_bits(sb_bgl_lock()), which is inconsistent.

Looking at the mballoc.c code it seems fairly straight forward to remove
the ext4_lock_group() entirely and stick with sb_bgl_lock() everywhere,
though in a few places both of them are used and it needs a bit of
a cleanup.

I've removed the mb_{set,clear}_bit_atomic() variants, since the code
always holds the sb_bgl_lock() over mb_set_bit() now.  I also moved the
flex_bg update inside the same sb_bgl_lock() as the mb_*_bit() instead
of dropping the lock and regetting it 2 lines later.  Callers of
mb_set_bits() must get the sb_bgl_lock() themselves, which is true 90%
of the time already, and avoids a branch for each call.

I'm not sure what to do with the lockdep "__releases(bitlock)" and
"__acquires(bitlock)" in ext4_grp_locked_error() as I know nothing
about it.

Patch (against 2.6.29) has only been compile tested, I wanted to get
some feedback from Aneesh on it.  There is in theory a chance that
there will be increased lock contention for sb_bgl_lock(), but in
most cases we already hold this lock.

========================================================================
--- ./fs/ext4/mballoc.c.orig	2009-04-08 19:13:13.000000000 -0600
+++ ./fs/ext4/mballoc.c	2009-04-08 20:05:48.000000000 -0600
@@ -373,24 +373,12 @@
 	ext4_set_bit(bit, addr);
 }
 
-static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_set_bit_atomic(lock, bit, addr);
-}
-
 static inline void mb_clear_bit(int bit, void *addr)
 {
 	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_clear_bit(bit, addr);
 }
 
-static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_clear_bit_atomic(lock, bit, addr);
-}
-
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
 	int fix = 0, ret, tmpmax;
@@ -449,7 +437,7 @@
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	BUG_ON(!spin_is_locked(sb_bgl_lock(EXT4_SB(sb), e4b->bd_group)));
 	for (i = 0; i < count; i++) {
 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
 			ext4_fsblk_t blocknr;
@@ -473,7 +461,7 @@
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	BUG_ON(!spin_is_locked(sb_bgl_lock(EXT4_SB(e4b->bd_sb),e4b->bd_group)));
 	for (i = 0; i < count; i++) {
 		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
 		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -881,9 +869,9 @@
 			/*
 			 * incore got set to the group block bitmap below
 			 */
-			ext4_lock_group(sb, group);
+			spin_lock(sb_bgl_lock(EXT4_SB(sb), group));
 			ext4_mb_generate_buddy(sb, data, incore, group);
-			ext4_unlock_group(sb, group);
+			spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 			incore = NULL;
 		} else {
 			/* this is block of bitmap */
@@ -892,13 +880,13 @@
 				group, page->index, i * blocksize);
 
 			/* see comments in ext4_mb_put_pa() */
-			ext4_lock_group(sb, group);
+			spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 			memcpy(data, bitmap, blocksize);
 
 			/* mark all preallocated blks used in in-core bitmap */
 			ext4_mb_generate_from_pa(sb, data, group);
 			ext4_mb_generate_from_freelist(sb, data, group);
-			ext4_unlock_group(sb, group);
+			spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 
 			/* set incore so that the buddy information can be
 			 * generated using this
@@ -1079,7 +1067,7 @@
 	return 0;
 }
 
-static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_clear_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1092,15 +1080,12 @@
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_clear_bit_atomic(lock, cur, bm);
-		else
-			mb_clear_bit(cur, bm);
+		mb_clear_bit(cur, bm);
 		cur++;
 	}
 }
 
-static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_set_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1113,10 +1098,7 @@
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_set_bit_atomic(lock, cur, bm);
-		else
-			mb_set_bit(cur, bm);
+		mb_set_bit(cur, bm);
 		cur++;
 	}
 }
@@ -1132,7 +1114,7 @@
 	struct super_block *sb = e4b->bd_sb;
 
 	BUG_ON(first + count > (sb->s_blocksize << 3));
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	BUG_ON(!spin_is_locked(sb_bgl_lock(EXT4_SB(sb), e4b->bd_group)));
 	mb_check_buddy(e4b);
 	mb_free_blocks_double(inode, e4b, first, count);
 
@@ -1213,7 +1195,7 @@
 	int ord;
 	void *buddy;
 
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	BUG_ON(!spin_is_locked(sb_bgl_lock(EXT4_SB(e4b->bd_sb),e4b->bd_group)));
 	BUG_ON(ex == NULL);
 
 	buddy = mb_find_buddy(e4b, order, &max);
@@ -1277,7 +1259,7 @@
 
 	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
 	BUG_ON(e4b->bd_group != ex->fe_group);
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	BUG_ON(!spin_is_locked(sb_bgl_lock(EXT4_SB(e4b->bd_sb),e4b->bd_group)));
 	mb_check_buddy(e4b);
 	mb_mark_used_double(e4b, start, len);
 
@@ -1331,8 +1313,7 @@
 		e4b->bd_info->bb_counters[ord]++;
 	}
 
-	mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
-			EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
 	mb_check_buddy(e4b);
 
 	return ret;
@@ -1511,7 +1492,7 @@
 	if (err)
 		return err;
 
-	ext4_lock_group(ac->ac_sb, group);
+	spin_lock(sb_bgl_lock(EXT4_SB(ac->ac_sb), group));
 	max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
 
 	if (max > 0) {
@@ -1519,7 +1500,7 @@
 		ext4_mb_use_best_found(ac, e4b);
 	}
 
-	ext4_unlock_group(ac->ac_sb, group);
+	spin_unlock(sb_bgl_lock(EXT4_SB(ac->ac_sb), group));
 	ext4_mb_release_desc(e4b);
 
 	return 0;
@@ -1542,7 +1523,7 @@
 	if (err)
 		return err;
 
-	ext4_lock_group(ac->ac_sb, group);
+	spin_lock(sb_bgl_lock(EXT4_SB(ac->ac_sb), group));
 	max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
 			     ac->ac_g_ex.fe_len, &ex);
 
@@ -1574,7 +1555,7 @@
 		ac->ac_b_ex = ex;
 		ext4_mb_use_best_found(ac, e4b);
 	}
-	ext4_unlock_group(ac->ac_sb, group);
+	spin_unlock(sb_bgl_lock(EXT4_SB(ac->ac_sb), group));
 	ext4_mb_release_desc(e4b);
 
 	return 0;
@@ -2048,10 +2029,10 @@
 			if (err)
 				goto out;
 
-			ext4_lock_group(sb, group);
+			spin_lock(sb_bgl_lock(EXT4_SB(sb), group));
 			if (!ext4_mb_good_group(ac, group, cr)) {
 				/* someone did allocation from this group */
-				ext4_unlock_group(sb, group);
+				spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 				ext4_mb_release_desc(&e4b);
 				continue;
 			}
@@ -2068,7 +2049,7 @@
 			else
 				ext4_mb_complex_scan_group(ac, &e4b);
 
-			ext4_unlock_group(sb, group);
+			spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 			ext4_mb_release_desc(&e4b);
 
 			if (ac->ac_status != AC_STATUS_CONTINUE)
@@ -2360,9 +2341,9 @@
 		seq_printf(seq, "#%-5u: I/O error\n", group);
 		return 0;
 	}
-	ext4_lock_group(sb, group);
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), group));
 	memcpy(&sg, ext4_get_group_info(sb, group), i);
-	ext4_unlock_group(sb, group);
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 	ext4_mb_release_desc(&e4b);
 
 	seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
@@ -2756,7 +2737,7 @@
 	return 0;
 }
 
-/* need to called with ext4 group lock (ext4_lock_group) */
+/* need to called with ext4 group lock (sb_bgl_lock) */
 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
 	struct ext4_prealloc_space *pa;
@@ -2787,9 +2768,9 @@
 #ifdef DOUBLE_CHECK
 			kfree(grinfo->bb_bitmap);
 #endif
-			ext4_lock_group(sb, i);
+			spin_lock(sb_bgl_lock(EXT4_SB(sb), i));
 			ext4_mb_cleanup_pa(grinfo);
-			ext4_unlock_group(sb, i);
+			spin_unlock(sb_bgl_lock(EXT4_SB(sb), i));
 			kfree(grinfo);
 		}
 		num_meta_group_infos = (sbi->s_groups_count +
@@ -2862,7 +2843,7 @@
 		/* there are blocks to put in buddy to make them really free */
 		count += entry->count;
 		count2++;
-		ext4_lock_group(sb, entry->group);
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), entry->group));
 		/* Take it out of per group rb tree */
 		rb_erase(&entry->node, &(db->bb_free_root));
 		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
@@ -2874,7 +2855,7 @@
 			page_cache_release(e4b.bd_buddy_page);
 			page_cache_release(e4b.bd_bitmap_page);
 		}
-		ext4_unlock_group(sb, entry->group);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), entry->group));
 		discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
 			+ entry->start_blk
 			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -3049,14 +3030,17 @@
 		 * Fix the bitmap and repeat the block allocation
 		 * We leak some of the blocks here.
 		 */
-		mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
-				bitmap_bh->b_data, ac->ac_b_ex.fe_start,
-				ac->ac_b_ex.fe_len);
+		spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+			    ac->ac_b_ex.fe_len);
+		spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		if (!err)
 			err = -EAGAIN;
 		goto out_err;
 	}
+
+	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
 #ifdef AGGRESSIVE_CHECK
 	{
 		int i;
@@ -3066,9 +3050,7 @@
 		}
 	}
 #endif
-	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	mb_set_bits(NULL, bitmap_bh->b_data,
-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
+	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_blks_set(sb, gdp,
@@ -3078,6 +3060,13 @@
 	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
 	ext4_free_blks_set(sb, gdp, len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
+
+	if (sbi->s_log_groups_per_flex) {
+		ext4_group_t flex_group = ext4_flex_group(sbi,
+							  ac->ac_b_ex.fe_group);
+		sbi->s_flex_groups[flex_group].free_blocks -=ac->ac_b_ex.fe_len;
+	}
+
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
 	/*
@@ -3090,14 +3079,6 @@
 		percpu_counter_sub(&sbi->s_dirtyblocks_counter,
 						ac->ac_b_ex.fe_len);
 
-	if (sbi->s_log_groups_per_flex) {
-		ext4_group_t flex_group = ext4_flex_group(sbi,
-							  ac->ac_b_ex.fe_group);
-		spin_lock(sb_bgl_lock(sbi, flex_group));
-		sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
-		spin_unlock(sb_bgl_lock(sbi, flex_group));
-	}
-
 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 	if (err)
 		goto out_err;
@@ -3509,7 +3490,7 @@
  * the function goes through all block freed in the group
  * but not yet committed and marks them used in in-core bitmap.
  * buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with ext4 group lock (sb_bgl_lock)
  */
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 						ext4_group_t group)
@@ -3523,9 +3504,7 @@
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, node);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-				bitmap, entry->start_blk,
-				entry->count);
+		mb_set_bits(bitmap, entry->start_blk, entry->count);
 		n = rb_next(n);
 	}
 	return;
@@ -3534,7 +3513,7 @@
 /*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with ext4 group lock (sb_bgl_lock)
  */
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
@@ -3566,8 +3545,7 @@
 		if (unlikely(len == 0))
 			continue;
 		BUG_ON(groupnr != group);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-						bitmap, start, len);
+		mb_set_bits(bitmap, start, len);
 		preallocated += len;
 		count++;
 	}
@@ -3625,9 +3603,9 @@
 	 * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
 	 * against that pair
 	 */
-	ext4_lock_group(sb, grp);
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), grp));
 	list_del(&pa->pa_group_list);
-	ext4_unlock_group(sb, grp);
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), grp));
 
 	spin_lock(pa->pa_obj_lock);
 	list_del_rcu(&pa->pa_inode_list);
@@ -3719,9 +3697,9 @@
 	pa->pa_obj_lock = &ei->i_prealloc_lock;
 	pa->pa_inode = ac->ac_inode;
 
-	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), ac->ac_b_ex.fe_group));
 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), ac->ac_b_ex.fe_group));
 
 	spin_lock(pa->pa_obj_lock);
 	list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
@@ -3781,9 +3759,9 @@
 	pa->pa_obj_lock = &lg->lg_prealloc_lock;
 	pa->pa_inode = NULL;
 
-	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), ac->ac_b_ex.fe_group));
 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), ac->ac_b_ex.fe_group));
 
 	/*
 	 * We will later add the new pa to the right bucket
@@ -3966,7 +3944,7 @@
 	INIT_LIST_HEAD(&list);
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
-	ext4_lock_group(sb, group);
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), group));
 	list_for_each_entry_safe(pa, tmp,
 				&grp->bb_prealloc_list, pa_group_list) {
 		spin_lock(&pa->pa_lock);
@@ -3995,7 +3973,7 @@
 	/* if we still need more blocks and some PAs were used, try again */
 	if (free < needed && busy) {
 		busy = 0;
-		ext4_unlock_group(sb, group);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 		/*
 		 * Yield the CPU here so that we don't get soft lockup
 		 * in non preempt case.
@@ -4028,7 +4006,7 @@
 	}
 
 out:
-	ext4_unlock_group(sb, group);
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 	if (ac)
 		kmem_cache_free(ext4_ac_cachep, ac);
 	ext4_mb_release_desc(&e4b);
@@ -4136,10 +4114,10 @@
 			continue;
 		}
 
-		ext4_lock_group(sb, group);
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), group));
 		list_del(&pa->pa_group_list);
 		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
-		ext4_unlock_group(sb, group);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 
 		ext4_mb_release_desc(&e4b);
 		put_bh(bitmap_bh);
@@ -4197,7 +4175,7 @@
 		struct ext4_prealloc_space *pa;
 		ext4_grpblk_t start;
 		struct list_head *cur;
-		ext4_lock_group(sb, i);
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), i));
 		list_for_each(cur, &grp->bb_prealloc_list) {
 			pa = list_entry(cur, struct ext4_prealloc_space,
 					pa_group_list);
@@ -4208,7 +4186,7 @@
 			printk(KERN_ERR "PA:%lu:%d:%u \n", i,
 							start, pa->pa_len);
 		}
-		ext4_unlock_group(sb, i);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), i));
 
 		if (grp->bb_free == 0)
 			continue;
@@ -4400,10 +4378,10 @@
 					"information for %u", group);
 			continue;
 		}
-		ext4_lock_group(sb, group);
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), group));
 		list_del(&pa->pa_group_list);
 		ext4_mb_release_group_pa(&e4b, pa, ac);
-		ext4_unlock_group(sb, group);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), group));
 
 		ext4_mb_release_desc(&e4b);
 		list_del(&pa->u.pa_tmp_list);
@@ -4901,37 +4879,31 @@
 		new_entry->group  = block_group;
 		new_entry->count = count;
 		new_entry->t_tid = handle->h_transaction->t_tid;
-		ext4_lock_group(sb, block_group);
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		ext4_mb_free_metadata(handle, &e4b, new_entry);
-		ext4_unlock_group(sb, block_group);
 	} else {
-		ext4_lock_group(sb, block_group);
 		/* need to update group_info->bb_free and bitmap
 		 * with group lock held. generate_buddy look at
 		 * them with group lock_held
 		 */
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		mb_free_blocks(inode, &e4b, bit, count);
 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-		ext4_unlock_group(sb, block_group);
 	}
 
-	spin_lock(sb_bgl_lock(sbi, block_group));
 	ret = ext4_free_blks_count(sb, gdp) + count;
 	ext4_free_blks_set(sb, gdp, ret);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
-	percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-		spin_lock(sb_bgl_lock(sbi, flex_group));
 		sbi->s_flex_groups[flex_group].free_blocks += count;
-		spin_unlock(sb_bgl_lock(sbi, flex_group));
 	}
+	spin_unlock(sb_bgl_lock(sbi, block_group));
+	percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
 	ext4_mb_release_desc(&e4b);
 
--- ./fs/ext4/super.c.orig	2009-04-08 19:13:13.000000000 -0600
+++ ./fs/ext4/super.c	2009-04-08 20:02:30.000000000 -0600
@@ -448,7 +448,7 @@
 		ext4_commit_super(sb, es, 0);
 		return;
 	}
-	ext4_unlock_group(sb, grp);
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), grp));
 	ext4_handle_error(sb);
 	/*
 	 * We only get here in the ERRORS_RO case; relocking the group
@@ -461,7 +461,7 @@
 	 * aggressively from the ext4 function in question, with a
 	 * more appropriate error code.
 	 */
-	ext4_lock_group(sb, grp);
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), grp));
 	return;
 }
 

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ