lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20100106084918.GA3475@skywalker.linux.vnet.ibm.com>
Date:	Wed, 6 Jan 2010 14:19:18 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	Eric Sandeen <sandeen@...hat.com>
Cc:	"Theodore Ts'o" <tytso@....edu>, linux-ext4@...r.kernel.org
Subject: Re: fsstress-induced corruption reproduced

> Maybe something like this works:
> 
> Index: linux-2.6/fs/ext4/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/ext4/inode.c
> +++ linux-2.6/fs/ext4/inode.c
> @@ -1203,6 +1203,7 @@ int ext4_get_blocks(handle_t *handle, st
>  		    int flags)
>  {
>  	int retval;
> +	int was_unwritten;
> 
>  	clear_buffer_mapped(bh);
>  	clear_buffer_unwritten(bh);
> @@ -1253,9 +1254,13 @@ int ext4_get_blocks(handle_t *handle, st
>  	 * part of the uninitialized extent to be an initialized
>  	 * extent.  This is because we need to avoid the combination
>  	 * of BH_Unwritten and BH_Mapped flags being simultaneously
> -	 * set on the buffer_head.
> +	 * set on the buffer_head.  However, if it was unwritten we
> +	 * don't want to update reserved space later.
>  	 */
> -	clear_buffer_unwritten(bh);
> +	if (buffer_unwritten(bh)) {
> +		was_unwritten = 1;
> +		clear_buffer_unwritten(bh);
> +	}


That won't work because we can do the fallocate after we did the ext4_ext_get_block
with create = 0 and before we do down_write below. So we would still have was_unwritten = 0


> 
>  	/*
>  	 * New blocks allocate and/or writing to uninitialized extent
> @@ -1301,7 +1306,8 @@ int ext4_get_blocks(handle_t *handle, st
>  	 * Update reserved blocks/metadata blocks after successful
>  	 * block allocation which had been deferred till now.
>  	 */
> -	if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
> +	if ((retval > 0) && !was_unwritten &&
> +	    (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
>  		ext4_da_update_reserve_space(inode, retval);
> 
>  	up_write((&EXT4_I(inode)->i_data_sem));
> 
> but that might leave the previous reservations hanging around from
> prior to the fallocate ...
> 

What commit d21cd8f163ac44b15c465aab7306db931c606908 did was to move
quota claim to ext4_get_block function. Earlier we didn't do a 
quota claim if we happened to write to fallocate area because
ext4_ext_handle_uninitialized_extents didn't call ext4_mb_mark_diskspace_used
So that should explain why we are seeing this problem with
d21cd8f163ac44b15c465aab7306db931c606908.

How about the patch below ?

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af7b626..b98de17 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1443,6 +1443,8 @@ extern int ext4_block_truncate_page(handle_t *handle,
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern int flush_aio_dio_completed_IO(struct inode *inode);
+extern void ext4_da_update_reserve_space(struct inode *inode,
+					int used, int quota_claim);
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7d7b74e..3b6ff72 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3132,7 +3132,19 @@ out:
 		unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
 					newblock + max_blocks,
 					allocated - max_blocks);
+		allocated = max_blocks;
 	}
+
+	/*
+	 * If we have done fallocate with the offset that is already
+	 * delayed allocated, we would have block reservation
+	 * and quota reservation done in the delayed write path.
+	 * But fallocate would have already updated quota and block
+	 * count for this offset. So cancel these reservation
+	 */
+	if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)
+		ext4_da_update_reserve_space(inode, allocated, 0);
+
 map_out:
 	set_buffer_mapped(bh_result);
 out1:
@@ -3368,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	/* previous routine could use block we allocated */
 	newblock = ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_len(&newex);
+	if (allocated > max_blocks)
+		allocated = max_blocks;
 	set_buffer_new(bh_result);
 
 	/*
+	 * Update reserved blocks/metadata blocks after successful
+	 * block allocation which had been deferred till now.
+	 */
+	if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)
+		ext4_da_update_reserve_space(inode, allocated, 1);
+
+	/*
 	 * Cache the extent and update transaction to commit on fdatasync only
 	 * when it is _not_ an uninitialized extent.
 	 */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c818972..77ff941 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1053,7 +1053,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
  * Called with i_data_sem down, which is important since we can call
  * ext4_discard_preallocations() from here.
  */
-static void ext4_da_update_reserve_space(struct inode *inode, int used)
+void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
@@ -1090,9 +1090,17 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
 	/* Update quota subsystem */
-	vfs_dq_claim_block(inode, used);
-	if (mdb_free)
-		vfs_dq_release_reservation_block(inode, mdb_free);
+	if (quota_claim) {
+		vfs_dq_claim_block(inode, used);
+		if (mdb_free)
+			vfs_dq_release_reservation_block(inode, mdb_free);
+	} else {
+		/*
+		 * This is a request to cancel the reservation. So just
+		 * update the resevation and cancel the quota reservation
+		 */
+		vfs_dq_release_reservation_block(inode, mdb_free + used);
+	}
 
 	/*
 	 * If we have done all the pending block allocations and if
@@ -1292,18 +1300,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 			 */
 			EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
 		}
-	}
 
+		/*
+		 * Update reserved blocks/metadata blocks after successful
+		 * block allocation which had been deferred till now. We don't
+		 * support fallocate for non extent files. So we can update
+		 * reserve space here.
+		 */
+		if ((retval > 0) &&
+			(flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
+			ext4_da_update_reserve_space(inode, retval, 1);
+	}
 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
 
-	/*
-	 * Update reserved blocks/metadata blocks after successful
-	 * block allocation which had been deferred till now.
-	 */
-	if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
-		ext4_da_update_reserve_space(inode, retval);
-
 	up_write((&EXT4_I(inode)->i_data_sem));
 	if (retval > 0 && buffer_mapped(bh)) {
 		int ret = check_block_validity(inode, "file system "






--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ