linux-ext4 - Re: What's cooking in e2fsprogs.git (topics)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 24 Apr 2008 00:28:01 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	Theodore Tso <tytso@....EDU>
Cc:	linux-ext4@...r.kernel.org
Subject: Re: What's cooking in e2fsprogs.git (topics)

On Mon, Apr 21, 2008 at 12:41:44PM -0400, Theodore Tso wrote:
> 
> * ak/undo-mgr (Mon Aug 13 15:56:26 2007 +0530) 6 commits
>  - Add test cases for undoe2fs: u_undoe2fs_mke2fs and
>    u_undoe2fs_tune2fs
>  - Fix the resize inode test case
>  - tune2fs: Support for large inode migration.
>  - mke2fs: Add support for the undo I/O manager.
>  - Add undoe2fs command
>  - libext2fs: Add undo I/O manager
> 
> 	These patches have been *significantly* rototilled.
> 
> 	* The test cases weren't correctly setting and deleting the
> 	  test_name.ok and test_name.failed files
> 
> 	* mke2fs would bomb out with an incomprehensible error message
> 	  if run twice in a row, or if the user didn't have write access
> 	  to /var/lib/e2fsprogs (some users run mke2fs as a non-root
> 	  user!)
> 
> 	* the undoe2fs program was calling com_err and passing
> 	  in uninitialized retval values, and was otherwise confused
> 	  about how to do proper error handling, resulting in garbage
> 	  getting printed if the file passed in didn't exist
> 
> 	* there was a terrible performance problem because in the
> 	  mke2fs case, the undo manager was using a tdb_data_size of
> 	  512.
> 
> 	* I added the ability to configure the location of the scratch
>           dirctory via mke2fs.conf for mk2efs.  What we should do for
>           tune2fs is less clear, and still needs to be addressed.  It
>           is also possible to force an undo file to be always created,
>           and not just when doing a lazy inode table initialization.
>           By using a 4k instead of 512 tdb_data_size, the time to
>           speed up an mke2fs was cut in half, while still using an
>           undo file.  I suspect if we force the tdb_data_size to be
>           even larger, say 32k or 64k the overhead would shrink even
>           more.
> 
> 	Unfortunately, there appears to be some kind of data
> 	corruption bug if I force a tdb_data_size of 32768, so I'm not
> 	entirely sure I trust the undo manager to be working
> 	correctly.  The undo_manager code itself needs a pretty
> 	serious auditing and checking to make sure it's doing the
> 	right thing with large tdb_data_sizes.
> 

The bug was in the changes added to support block size via set_options
We had two records in the data for blocksize. one 1024. configured via
set_blk_size and other 32K configured via set_options. So the undoe2fs
was replaying with 1K as the block size.

The below changes get everything working for me. The patch is not clean.
so they are not to apply. I still need to think a little bit more about
what to do when we attempt to read tdb_data_size from the end of file
system. Will send the cleanup patch later.

Let me know whether you want to keep the debug printf

diff --git a/lib/ext2fs/undo_io.c b/lib/ext2fs/undo_io.c
index 9bee1b6..4ad0fd6 100644
--- a/lib/ext2fs/undo_io.c
+++ b/lib/ext2fs/undo_io.c
@@ -169,6 +169,9 @@ static errcode_t write_block_size(TDB_CONTEXT *tdb, int block_size)
 	tdb_key.dsize = sizeof("filesystem BLKSIZE");
 	tdb_data.dptr = (unsigned char *)&(block_size);
 	tdb_data.dsize = sizeof(block_size);
+#ifdef DEBUG
+	printf("Setting blocksize %d\n", block_size);
+#endif
 
 	retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
 	if (retval == -1) {
@@ -182,13 +185,14 @@ static errcode_t undo_write_tdb(io_channel channel,
 				unsigned long block, int count)
 
 {
-	int size, loop_count = 0, i;
+	int size, i;
 	unsigned long block_num, backing_blk_num;
 	errcode_t retval = 0;
 	ext2_loff_t offset;
 	struct undo_private_data *data;
 	TDB_DATA tdb_key, tdb_data;
 	char *read_ptr;
+	unsigned long end_block;
 
 	data = (struct undo_private_data *) channel->private_data;
 
@@ -208,21 +212,26 @@ static errcode_t undo_write_tdb(io_channel channel,
 			size = count * channel->block_size;
 	}
 
+#ifdef DEBUG
+	printf("Writing at %ld of size %ld\n", block, size);
+#endif
 	/*
 	 * Data is stored in tdb database as blocks of tdb_data_size size
 	 * This helps in efficient lookup further.
 	 *
 	 * We divide the disk to blocks of tdb_data_size.
 	 */
+	/*FIXME  need to check end of file system. */
 
-	block_num = ((block*channel->block_size)+data->offset) /
-		data->tdb_data_size;
-
-	loop_count = (size + data->tdb_data_size -1) /
-		data->tdb_data_size;
+	offset = (block * channel->block_size) + data->offset ;
+	block_num = offset / data->tdb_data_size;
+	end_block = (offset + size) / data->tdb_data_size;
 
+#ifdef DEBUG
+	printf("Writing from %ld to %ld\n", block_num, end_block);
+#endif
 	tdb_transaction_start(data->tdb);
-	for (i = 0; i < loop_count; i++) {
+	while (block_num <= end_block ) {
 
 		tdb_key.dptr = (unsigned char *)&block_num;
 		tdb_key.dsize = sizeof(block_num);
@@ -231,8 +240,11 @@ static errcode_t undo_write_tdb(io_channel channel,
 		 * Check if we have the record already
 		 */
 		if (tdb_exists(data->tdb, tdb_key)) {
-
 			/* Try the next block */
+#ifdef DEBUG
+			printf("The block %d already exist in database\n",
+					block_num);
+#endif
 			block_num++;
 			continue;
 		}
@@ -258,6 +270,9 @@ static errcode_t undo_write_tdb(io_channel channel,
 		}
 
 		memset(read_ptr, 0, count);
+#ifdef DEBUG
+		printf("Reading from %ld to %ld\n", backing_blk_num, count);
+#endif
 
 		retval = io_channel_read_blk(data->real, backing_blk_num,
 					     -count, read_ptr);
@@ -276,10 +291,22 @@ static errcode_t undo_write_tdb(io_channel channel,
 		printf("Printing with key %ld data %x and size %d\n",
 		       block_num,
 		       tdb_data.dptr,
-		       channel->tdb_data_size);
+		       data->tdb_data_size);
 #endif
 
-		data->tdb_written = 1;
+		if (!data->tdb_written) {
+			data->tdb_written = 1;
+
+			/* Write the blocksize to tdb file */
+			retval = write_block_size(data->tdb,
+						  data->tdb_data_size);
+			if (retval) {
+				tdb_transaction_cancel(data->tdb);
+				retval = EXT2_ET_TDB_ERR_IO;
+				free(read_ptr);
+				return retval;
+			}
+		}
 		retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_INSERT);
 		if (retval == -1) {
 			/*
@@ -417,11 +444,6 @@ static errcode_t undo_set_blksize(io_channel channel, int blksize)
 	 */
 	if (!data->tdb_data_size) {
 		data->tdb_data_size = blksize;
-
-		/* Write it to tdb file */
-		retval = write_block_size(data->tdb, data->tdb_data_size);
-		if (retval)
-			return retval;
 	}
 
 	channel->block_size = blksize;
@@ -540,12 +562,6 @@ static errcode_t undo_set_option(io_channel channel, const char *option,
 			return EXT2_ET_INVALID_ARGUMENT;
 		if (!data->tdb_data_size || !data->tdb_written) {
 			data->tdb_data_size = tmp;
-
-			/* Write it to tdb file */
-			retval = write_block_size(data->tdb,
-						  data->tdb_data_size);
-			if (retval)
-				return retval;
 		}
 		return 0;
 	}
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 593b743..be772ee 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1745,6 +1745,7 @@ static int should_do_undo(const char *name)
 	io_manager manager = unix_io_manager;
 	int csum_flag, force_undo;
 
+
 	csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(&fs_param,
 					       EXT4_FEATURE_RO_COMPAT_GDT_CSUM);
 	force_undo = get_int_from_profile(fs_types, "force_undo", 0);
@@ -1873,7 +1874,7 @@ int main (int argc, char *argv[])
 		com_err(device_name, retval, _("while setting up superblock"));
 		exit(1);
 	}
-#if 0			/* XXX bug in undo_io.c if we set this? */
+#if 1			/* XXX bug in undo_io.c if we set this? */
 	io_channel_set_options(fs->io, "tdb_data_size=32768");
 #endif
 
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html