lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1362731059-3508-1-git-send-email-lczerner@redhat.com>
Date:	Fri,  8 Mar 2013 09:24:17 +0100
From:	Lukas Czerner <lczerner@...hat.com>
To:	linux-ext4@...r.kernel.org
Cc:	gnehzuil.liu@...il.com, Lukas Czerner <lczerner@...hat.com>
Subject: [PATCH 1/3] ext4: Reserve metadata if writing into uninitialized

Currently in delalloc write we do not reserve any space if we're
writing into the uninitialized extent. This is ok for data, because
the space has already been allocated so we do not have to do data
reservation, however we have to reserve metadata for uninitialized
extent conversion on writeback.

Add new ext4_da_reserve_metadata() function to only reserve metadata
blocks for delayed allocation an use it if we're writing delayed blocks
into unwritten extent to reserve metadata for extent conversion.

The problem can be reproduced with xfstest 083 on bigalloc file system.
With this patch I can not reproduce the problem anymore.

Signed-off-by: Lukas Czerner <lczerner@...hat.com>
---
 fs/ext4/ext4.h  |    1 +
 fs/ext4/inode.c |   76 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6e16c18..c20efe2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -581,6 +581,7 @@ enum {
 #define EXT4_GET_BLOCKS_NO_LOCK			0x0100
 	/* Do not put hole in extent cache */
 #define EXT4_GET_BLOCKS_NO_PUT_HOLE		0x0200
+#define EXT4_GET_BLOCKS_METADATA_RESERVED	0x0400
 
 /*
  * Flags used by ext4_free_blocks
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9ea0cde..c3b88ec 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -606,7 +606,8 @@ found:
 	 * let the underlying get_block() function know to
 	 * avoid double accounting
 	 */
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+	if ((flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ||
+	    (flags & EXT4_GET_BLOCKS_METADATA_RESERVED))
 		ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 	/*
 	 * We need to check for EXT4 here because migrate
@@ -636,7 +637,8 @@ found:
 			(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
 			ext4_da_update_reserve_space(inode, retval, 1);
 	}
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+	if ((flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ||
+	    (flags & EXT4_GET_BLOCKS_METADATA_RESERVED))
 		ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
 	if (retval > 0) {
@@ -1215,6 +1217,56 @@ static int ext4_journalled_write_end(struct file *file,
 	return ret ? ret : copied;
 }
 
+
+/*
+ * Reserve a metadata for a single block located at lblock
+ */
+static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
+{
+	int retries = 0;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned int md_needed;
+	ext4_lblk_t save_last_lblock;
+	int save_len;
+
+	/*
+	 * recalculate the amount of metadata blocks to reserve
+	 * in order to allocate nrblocks
+	 * worse case is one extent per block
+	 */
+repeat:
+	spin_lock(&ei->i_block_reservation_lock);
+	/*
+	 * ext4_calc_metadata_amount() has side effects, which we have
+	 * to be prepared undo if we fail to claim space.
+	 */
+	save_len = ei->i_da_metadata_calc_len;
+	save_last_lblock = ei->i_da_metadata_calc_last_lblock;
+	md_needed = EXT4_NUM_B2C(sbi,
+				 ext4_calc_metadata_amount(inode, lblock));
+	trace_ext4_da_reserve_space(inode, md_needed);
+
+	/*
+	 * We do still charge estimated metadata to the sb though;
+	 * we cannot afford to run out of free blocks.
+	 */
+	if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
+		ei->i_da_metadata_calc_len = save_len;
+		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+		spin_unlock(&ei->i_block_reservation_lock);
+		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+			cond_resched();
+			goto repeat;
+		}
+		return -ENOSPC;
+	}
+	ei->i_reserved_meta_blocks += md_needed;
+	spin_unlock(&ei->i_block_reservation_lock);
+
+	return 0;       /* success */
+}
+
 /*
  * Reserve a single cluster located at lblock
  */
@@ -1601,7 +1653,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
 	 */
 	map.m_lblk = next;
 	map.m_len = max_blocks;
-	get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
+	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
+			   EXT4_GET_BLOCKS_METADATA_RESERVED;
 	if (ext4_should_dioread_nolock(mpd->inode))
 		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
 	if (mpd->b_state & (1 << BH_Delay))
@@ -1766,7 +1819,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 			      struct buffer_head *bh)
 {
 	struct extent_status es;
-	int retval;
+	int retval, ret;
 	sector_t invalid_block = ~((sector_t) 0xffff);
 
 	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
@@ -1804,9 +1857,19 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 		map->m_len = retval;
 		if (ext4_es_is_written(&es))
 			map->m_flags |= EXT4_MAP_MAPPED;
-		else if (ext4_es_is_unwritten(&es))
+		else if (ext4_es_is_unwritten(&es)) {
+			/*
+			 * We have delalloc write into the unwritten extent
+			 * which means that we have to reserve metadata
+			 * potentially required for converting unwritten
+			 * extent.
+			 */
+			ret = ext4_da_reserve_metadata(inode, iblock);
+			if (ret)
+				/* not enough space to reserve */
+				retval = ret;
 			map->m_flags |= EXT4_MAP_UNWRITTEN;
-		else
+		} else
 			BUG_ON(1);
 
 		return retval;
@@ -1838,7 +1901,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 
 add_delayed:
 	if (retval == 0) {
-		int ret;
 		/*
 		 * XXX: __block_prepare_write() unmaps passed block,
 		 * is it OK?
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists