lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 01 Jun 2008 16:36:10 -0700
From:	Mingming Cao <cmm@...ibm.com>
To:	linux-ext4@...r.kernel.org
Subject: [RFC][PATCH 4/6] delalloc ENOSPC: avoid free blocks  double booking

ext4: delalloc block reservation avoid double accounting

From: Mingming cao <cmm@...ibm.com>

Since fs free blocks counters are already reduced at the block reservation
time, we need to let the underlying block allocation know to avoid
decrease the free blocks counter again when real block allocation finished.

Signed-off-by: Mingming cao <cmm@...ibm.com>
---
 fs/ext4/dir.c     |    3 ++-
 fs/ext4/ext4.h    |    6 +++++-
 fs/ext4/ext4_i.h  |    1 +
 fs/ext4/extents.c |    2 +-
 fs/ext4/inode.c   |   25 ++++++++++++++++++-------
 fs/ext4/mballoc.c |   13 ++++++++++++-
 fs/ext4/super.c   |    2 ++
 7 files changed, 41 insertions(+), 11 deletions(-)

Index: linux-2.6.26-rc4/fs/ext4/ext4.h
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/ext4.h	2008-06-01 14:22:03.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/ext4.h	2008-06-01 15:04:14.000000000 -0700
@@ -74,6 +74,9 @@
 #define EXT4_MB_HINT_GOAL_ONLY		256
 /* goal is meaningful */
 #define EXT4_MB_HINT_TRY_GOAL		512
+/* blocks already pre-reserved by delayed allocation */
+#define EXT4_MB_DELALLOC_RESERVED      1024
+
 
 struct ext4_allocation_request {
 	/* target inode for block we're allocating */
@@ -1039,6 +1042,7 @@ extern void ext4_mb_free_blocks(handle_t
 
 
 /* inode.c */
+void ext4_da_release_space(struct inode *inode, int used, int to_free);
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 		struct buffer_head *bh, ext4_fsblk_t blocknr);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1231,7 +1235,7 @@ extern long ext4_fallocate(struct inode 
 extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
 			sector_t block, unsigned long max_blocks,
 			struct buffer_head *bh, int create,
-			int extend_disksize);
+			int extend_disksize, int flag);
 #endif	/* __KERNEL__ */
 
 #endif	/* _EXT4_H */
Index: linux-2.6.26-rc4/fs/ext4/inode.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/inode.c	2008-06-01 15:04:06.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/inode.c	2008-06-01 15:04:14.000000000 -0700
@@ -973,7 +973,7 @@ out:
  */
 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 			unsigned long max_blocks, struct buffer_head *bh,
-			int create, int extend_disksize)
+			int create, int extend_disksize, int flag)
 {
 	int retval;
 
@@ -1014,6 +1014,15 @@ int ext4_get_blocks_wrap(handle_t *handl
 	 * with create == 1 flag.
 	 */
 	down_write((&EXT4_I(inode)->i_data_sem));
+
+	/*
+	 * if the caller is from delayed allocation writeout path
+	 * we have already reserved fs blocks for allocation
+	 * let the underlying get_block() function know to
+	 * avoid double accounting
+	 */
+	if (flag)
+		EXT4_I(inode)->i_delalloc_reserved_flag = 1;
 	/*
 	 * We need to check for EXT4 here because migrate
 	 * could have changed the inode type in between
@@ -1035,6 +1044,8 @@ int ext4_get_blocks_wrap(handle_t *handl
 							~EXT4_EXT_MIGRATE;
 		}
 	}
+	if (flag)
+		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
 	up_write((&EXT4_I(inode)->i_data_sem));
 	return retval;
 }
@@ -1060,7 +1071,7 @@ static int ext4_get_block(struct inode *
 	}
 
 	ret = ext4_get_blocks_wrap(handle, inode, iblock,
-					max_blocks, bh_result, create, 0);
+					max_blocks, bh_result, create, 0, 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1086,7 +1097,7 @@ struct buffer_head *ext4_getblk(handle_t
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
 	err = ext4_get_blocks_wrap(handle, inode, block, 1,
-					&dummy, create, 1);
+					&dummy, create, 1, 0);
 	/*
 	 * ext4_get_blocks_handle() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
@@ -1440,7 +1451,7 @@ static int ext4_da_reserve_space(struct 
 	return 0;       /* success */
 }
 
-static void ext4_da_release_space(struct inode *inode, int used, int to_free)
+void ext4_da_release_space(struct inode *inode, int used, int to_free)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int total, mdb, release;
@@ -1483,7 +1494,7 @@ static int ext4_da_get_block_prep(struct
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0);
+	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
 	if ((ret == 0)&& !buffer_delay(bh_result)) {
 		/* the block isn't (pre)allocated yet, let's reserve space */
 		/*
@@ -1505,7 +1516,7 @@ static int ext4_da_get_block_prep(struct
 
 	return ret;
 }
-
+#define		EXT4_DELALLOC_RSVED	1
 static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
@@ -1519,7 +1530,7 @@ static int ext4_da_get_block_write(struc
 	BUG_ON(create == 0);
 
 	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-				   bh_result, create, 0);
+				   bh_result, create, 0, EXT4_DELALLOC_RSVED);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 
Index: linux-2.6.26-rc4/fs/ext4/ext4_i.h
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/ext4_i.h	2008-06-01 14:26:14.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/ext4_i.h	2008-06-01 15:04:14.000000000 -0700
@@ -166,6 +166,7 @@ struct ext4_inode_info {
 	/* allocation reservation info for delalloc */
 	unsigned long i_reserved_data_blocks;
 	unsigned long i_reserved_meta_blocks;
+	unsigned short i_delalloc_reserved_flag;
 };
 
 #endif	/* _EXT4_I */
Index: linux-2.6.26-rc4/fs/ext4/super.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/super.c	2008-06-01 14:26:14.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/super.c	2008-06-01 15:04:14.000000000 -0700
@@ -574,6 +574,7 @@ static struct inode *ext4_alloc_inode(st
 	spin_lock_init(&ei->i_prealloc_lock);
 	ei->i_reserved_data_blocks = 0;
 	ei->i_reserved_meta_blocks = 0;
+	ei->i_delalloc_reserved_flag = 0;
 	return &ei->vfs_inode;
 }
 
@@ -1328,6 +1329,7 @@ set_qf_format:
 			sbi->s_stripe = option;
 			break;
 		case Opt_delalloc:
+			printk("delayed allocation enabled\n");
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
 		default:
Index: linux-2.6.26-rc4/fs/ext4/mballoc.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/mballoc.c	2008-06-01 14:22:02.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/mballoc.c	2008-06-01 15:04:14.000000000 -0700
@@ -2831,7 +2831,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
 	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+
+	/*
+	 * free blocks account has already be reduced/reserved
+	 * at write_begin() time for delayed allocation
+	 * do not double accounting
+	 */
+	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+		percpu_counter_sub(&sbi->s_freeblocks_counter,
+					ac->ac_b_ex.fe_len);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4055,6 +4063,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
 	}
 	inquota = ar->len;
 
+	if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+		ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 	if (!ac) {
 		ar->len = 0;
Index: linux-2.6.26-rc4/fs/ext4/dir.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/dir.c	2008-06-01 13:06:07.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/dir.c	2008-06-01 15:04:14.000000000 -0700
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * fi
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
+		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
+						0, 0, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
Index: linux-2.6.26-rc4/fs/ext4/extents.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/extents.c	2008-06-01 14:57:57.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/extents.c	2008-06-01 15:04:14.000000000 -0700
@@ -2934,7 +2934,7 @@ retry:
 		}
 		ret = ext4_get_blocks_wrap(handle, inode, block,
 					  max_blocks, &map_bh,
-					  EXT4_CREATE_UNINITIALIZED_EXT, 0);
+					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ