lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed,  1 Aug 2007 07:34:09 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	tytso@....edu
Cc:	linux-ext4@...r.kernel.org,
	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
Subject: [PATCH 4/4] e2fsprogs: Support for large inode migration.

From: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>

Add new option -I <inode_size> to tune2fs.
This is used to change the inode size. The size
need to be multiple of 2 and we don't allow to
decrease the inode size.

As a part of increasing the inode size we increase the
inode table size. We also move the used data blocks around
and update the respective inodes to point to the new block


tune2fs use undo I/O manager when migrating to large
inode. This helps in reverting the changes if end results
are not correct.The environment variable TUNE2FS_SCRATCH_DIR
is used to indicate the  directory within which the tdb
file need to be created. The file will be named tune2fs-XXXXXX
If TUNE2FS_SCRATCH_DIR is not set /var/lib/e2fsprogs is used

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
---
 misc/tune2fs.c |  525 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 522 insertions(+), 3 deletions(-)

diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 833b994..8cfb05a 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -61,6 +61,7 @@ char * new_label, *new_last_mounted, *new_UUID;
 char * io_options;
 static int c_flag, C_flag, e_flag, f_flag, g_flag, i_flag, l_flag, L_flag;
 static int m_flag, M_flag, r_flag, s_flag = -1, u_flag, U_flag, T_flag;
+static int I_flag;
 static time_t last_check_time;
 static int print_label;
 static int max_mount_count, mount_count, mount_flags;
@@ -71,10 +72,20 @@ static unsigned short errors;
 static int open_flag;
 static char *features_cmd;
 static char *mntopts_cmd;
+static unsigned long int new_inode_size;
 
 int journal_size, journal_flags;
 char *journal_device;
 
+static struct list_head blk_move_list;
+
+struct blk_move {
+	struct list_head list;
+	blk_t old_loc;
+	blk_t new_loc;
+};
+
+
 static const char *please_fsck = N_("Please run e2fsck on the filesystem.\n");
 
 void do_findfs(int argc, char **argv);
@@ -89,7 +100,8 @@ static void usage(void)
 		  "\t[-o [^]mount_options[,...]] [-r reserved_blocks_count]\n"
 		  "\t[-u user] [-C mount_count] [-L volume_label] "
 		  "[-M last_mounted_dir]\n"
-		  "\t[-O [^]feature[,...]] [-T last_check_time] [-U UUID]"
+		  "\t[-O [^]feature[,...]] [-T last_check_time] [-U UUID]\n"
+		  "\t[ -I new_inode_size ]"
 		  " device\n"), program_name);
 	exit (1);
 }
@@ -505,7 +517,7 @@ static void parse_tune2fs_options(int argc, char **argv)
 	struct passwd * pw;
 
 	printf("tune2fs %s (%s)\n", E2FSPROGS_VERSION, E2FSPROGS_DATE);
-	while ((c = getopt(argc, argv, "c:e:fg:i:jlm:o:r:s:u:C:J:L:M:O:T:U:")) != EOF)
+	while ((c = getopt(argc, argv, "c:e:fg:i:jlm:o:r:s:u:C:J:L:M:O:T:U:I:")) != EOF)
 		switch (c)
 		{
 			case 'c':
@@ -702,6 +714,25 @@ static void parse_tune2fs_options(int argc, char **argv)
 				open_flag = EXT2_FLAG_RW |
 					EXT2_FLAG_JOURNAL_DEV_OK;
 				break;
+			case 'I':
+				new_inode_size = strtoul (optarg, &tmp, 0);
+				if (*tmp) {
+					com_err (program_name, 0,
+						_("bad Inode size - %s"),
+							optarg);
+					usage();
+				}
+				if (!((new_inode_size &
+						(new_inode_size - 1)) == 0)) {
+					com_err (program_name, 0,
+						_("Inode size must be a "
+						"power of two- %s"),
+							optarg);
+					usage();
+				}
+				open_flag = EXT2_FLAG_RW;
+				I_flag = 1;
+				break;
 			default:
 				usage();
 		}
@@ -739,6 +770,460 @@ void do_findfs(int argc, char **argv)
 	exit(0);
 }
 
+static int get_move_bitmap(ext2_filsys fs, int new_ino_blks_per_grp,
+					ext2fs_block_bitmap bmap)
+{
+	dgrp_t i;
+	blk_t j, needed_blocks = 0;
+	blk_t start_blk, end_blk;
+
+	for (i = 0; i < fs->group_desc_count; i++) {
+
+		start_blk = fs->group_desc[i].bg_inode_table +
+					fs->inode_blocks_per_group;
+
+		end_blk = fs->group_desc[i].bg_inode_table +
+					new_ino_blks_per_grp;
+
+		for (j = start_blk; j < end_blk; j++) {
+
+			if (ext2fs_test_block_bitmap(fs->block_map, j)) {
+				/* FIXME!!
+				 * What happens if the block is marked
+				 * as a bad block
+				 */
+				ext2fs_mark_block_bitmap(bmap, j);
+				needed_blocks++;
+			} else {
+				/*
+				 * We are going to use this block for
+				 * inode table. So mark them used.
+				 */
+				ext2fs_mark_block_bitmap(fs->block_map, j);
+			}
+		}
+	}
+
+	if (needed_blocks > fs->super->s_free_blocks_count ) {
+		return ENOSPC;
+	}
+
+	return 0;
+}
+
+static int move_block(ext2_filsys fs, ext2fs_block_bitmap bmap)
+{
+	char *buf;
+	errcode_t retval;
+	blk_t blk, new_blk;
+	struct blk_move *bmv;
+
+
+	retval = ext2fs_get_mem(fs->blocksize, &buf);
+	if (retval)
+		return retval;
+
+	for (blk = fs->super->s_first_data_block;
+			blk < fs->super->s_blocks_count; blk++) {
+
+		if (!ext2fs_test_block_bitmap(bmap, blk))
+			continue;
+
+		retval = ext2fs_new_block(fs, blk, NULL, &new_blk);
+		if (retval)
+			goto err_out;
+
+		/* Mark this block as allocated */
+		ext2fs_mark_block_bitmap(fs->block_map, new_blk);
+
+		/* Add it to block move list */
+		retval = ext2fs_get_mem(sizeof(struct blk_move), &bmv);
+		if (retval)
+			goto err_out;
+
+		bmv->old_loc = blk;
+		bmv->new_loc = new_blk;
+
+		list_add(&(bmv->list), &blk_move_list);
+
+		retval = io_channel_read_blk(fs->io, blk, 1, buf);
+		if (retval)
+			goto err_out;
+
+		retval = io_channel_write_blk(fs->io, new_blk, 1, buf);
+		if (retval)
+			goto err_out;
+	}
+
+err_out:
+	ext2fs_free_mem(&buf);
+	return retval;
+}
+static blk_t transalate_block(blk_t blk)
+{
+	struct list_head *entry;
+	struct blk_move *bmv;
+
+	list_for_each(entry, &blk_move_list) {
+
+		bmv = list_entry(entry, struct blk_move, list);
+		if (bmv->old_loc == blk)
+			return bmv->new_loc;
+	}
+
+	return 0;
+}
+
+static int process_block(ext2_filsys fs, blk_t	*block_nr,
+			 e2_blkcnt_t blockcnt,
+			 blk_t ref_block EXT2FS_ATTR((unused)),
+			 int ref_offset EXT2FS_ATTR((unused)),
+			 void *priv_data EXT2FS_ATTR((unused)))
+{
+	int ret = 0;
+	blk_t new_blk;
+
+
+	new_blk = transalate_block(*block_nr);
+	if (new_blk) {
+		*block_nr = new_blk;
+		/*
+		 * This will force the ext2fs_write_inode in the iterator
+		 */
+		ret |= BLOCK_CHANGED;
+	}
+
+	return ret;
+}
+
+static int inode_scan_and_fix(ext2_filsys fs)
+{
+	errcode_t retval = 0;
+	ext2_ino_t ino;
+	blk_t blk;
+	char *block_buf = 0;
+	struct ext2_inode inode;
+	ext2_inode_scan	scan = NULL;
+
+	retval = ext2fs_get_mem(fs->blocksize * 3, &block_buf);
+	if (retval)
+		return retval;
+
+	retval = ext2fs_open_inode_scan(fs, 0, &scan);
+	if (retval)
+		goto err_out;
+
+	while (1) {
+
+		retval = ext2fs_get_next_inode(scan, &ino, &inode);
+		if (retval)
+			goto err_out;
+
+		if (!ino)
+			break;
+
+		if (inode.i_links_count == 0)
+			continue; /* inode not in use */
+
+		/* FIXME!!
+		 * If we end up modifying the journal inode
+		 * the sb->s_jnl_blocks will differ. But a
+		 * subsequent e2fsck fixes that.
+		 * Do we need to fix this ??
+		 */
+
+		if (inode.i_file_acl) {
+
+			blk = transalate_block(inode.i_file_acl);
+			if (!blk)
+				continue;
+
+			inode.i_file_acl = blk;
+
+			/*
+			 * Write the inode to disk so that inode table
+			 * resizing can work
+			 */
+			retval = ext2fs_write_inode(fs, ino, &inode);
+			if (retval)
+				goto err_out;
+		}
+
+		if (!ext2fs_inode_has_valid_blocks(&inode))
+			continue;
+
+		retval = ext2fs_block_iterate2(fs, ino, 0,
+						block_buf, process_block,
+						0);
+		if (retval)
+			goto err_out;
+
+	}
+
+err_out:
+	ext2fs_free_mem(&block_buf);
+
+	return retval;
+
+}
+
+
+static int expand_inode_table(ext2_filsys fs, unsigned long int new_inode_size)
+{
+	dgrp_t i;
+	blk_t blk;
+	errcode_t retval;
+	int new_ino_blks_per_grp, j;
+	char *old_itable = NULL, *new_itable = NULL;
+	char *tmp_old_itable = NULL, *tmp_new_itable = NULL;
+	unsigned long int old_inode_size;
+	int old_itable_size, new_itable_size;
+
+	old_itable_size = fs->inode_blocks_per_group * fs->blocksize;
+	old_inode_size = EXT2_INODE_SIZE(fs->super);
+
+	new_ino_blks_per_grp = ext2fs_div_ceil(
+					EXT2_INODES_PER_GROUP(fs->super) *
+					new_inode_size,
+					fs->blocksize);
+
+	new_itable_size = new_ino_blks_per_grp * fs->blocksize;
+
+	retval = ext2fs_get_mem(old_itable_size, &old_itable);
+	if (retval)
+		return retval;
+
+	retval = ext2fs_get_mem(new_itable_size, &new_itable);
+	if (retval)
+		goto err_out;
+
+	tmp_old_itable = old_itable;
+	tmp_new_itable = new_itable;
+
+	for (i = 0; i < fs->group_desc_count; i++) {
+
+		blk = fs->group_desc[i].bg_inode_table;
+		retval = io_channel_read_blk(fs->io, blk,
+				fs->inode_blocks_per_group, old_itable);
+		if (retval)
+			goto err_out;
+
+		for (j = 0; j < EXT2_INODES_PER_GROUP(fs->super); j++) {
+
+			memcpy(new_itable, old_itable, old_inode_size);
+
+			memset(new_itable+old_inode_size, 0,
+					new_inode_size - old_inode_size);
+
+			new_itable +=  new_inode_size;
+			old_itable += old_inode_size;
+		}
+
+		/* reset the pointer */
+		old_itable = tmp_old_itable;
+		new_itable = tmp_new_itable;
+
+		retval = io_channel_write_blk(fs->io, blk,
+					new_ino_blks_per_grp, new_itable);
+		if (retval)
+			goto err_out;
+	}
+
+	/* Update the meta data */
+	fs->inode_blocks_per_group = new_ino_blks_per_grp;
+	fs->super->s_inode_size = new_inode_size;
+
+err_out:
+	if (old_itable)
+		ext2fs_free_mem(&old_itable);
+
+	if (new_itable)
+		ext2fs_free_mem(&new_itable);
+
+	return retval;
+
+}
+
+static errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs)
+{
+	blk_t		blk;
+	ext2_ino_t	ino;
+	unsigned int	group = 0;
+	unsigned int	count = 0;
+	int		total_free = 0;
+	int		group_free = 0;
+
+	/*
+	 * First calculate the block statistics
+	 */
+	for (blk = fs->super->s_first_data_block;
+	     blk < fs->super->s_blocks_count; blk++) {
+		if (!ext2fs_fast_test_block_bitmap(fs->block_map, blk)) {
+			group_free++;
+			total_free++;
+		}
+		count++;
+		if ((count == fs->super->s_blocks_per_group) ||
+		    (blk == fs->super->s_blocks_count-1)) {
+			fs->group_desc[group++].bg_free_blocks_count =
+				group_free;
+			count = 0;
+			group_free = 0;
+		}
+	}
+	fs->super->s_free_blocks_count = total_free;
+	
+	/*
+	 * Next, calculate the inode statistics
+	 */
+	group_free = 0;
+	total_free = 0;
+	count = 0;
+	group = 0;
+
+	/* Protect loop from wrap-around if s_inodes_count maxed */
+	for (ino = 1; ino <= fs->super->s_inodes_count && ino > 0; ino++) {
+		if (!ext2fs_fast_test_inode_bitmap(fs->inode_map, ino)) {
+			group_free++;
+			total_free++;
+		}
+		count++;
+		if ((count == fs->super->s_inodes_per_group) ||
+		    (ino == fs->super->s_inodes_count)) {
+			fs->group_desc[group++].bg_free_inodes_count =
+				group_free;
+			count = 0;
+			group_free = 0;
+		}
+	}
+	fs->super->s_free_inodes_count = total_free;
+	ext2fs_mark_super_dirty(fs);
+	return 0;
+}
+
+#define list_for_each_safe(pos, pnext, head) \
+	for (pos = (head)->next, pnext = pos->next; pos != (head); \
+	     pos = pnext, pnext = pos->next)
+
+static void free_blk_move_list()
+{
+	struct list_head *entry, *tmp;
+	struct blk_move *bmv;
+
+	list_for_each_safe(entry, tmp, &blk_move_list) {
+
+		bmv = list_entry(entry, struct blk_move, list);
+		list_del(entry);
+		ext2fs_free_mem(&bmv);
+	}
+
+	return ;
+}
+static int resize_inode(ext2_filsys fs, unsigned long int new_inode_size)
+{
+	errcode_t retval;
+	int new_ino_blks_per_grp;
+	ext2fs_block_bitmap bmap;
+
+	if (new_inode_size <= EXT2_INODE_SIZE(fs->super)) {
+		fprintf(stderr, _("New Inode size too small\n"));
+		return EXT2_ET_INVALID_ARGUMENT;
+	}
+
+	ext2fs_read_inode_bitmap(fs);
+	ext2fs_read_block_bitmap(fs);
+	INIT_LIST_HEAD(&blk_move_list);
+
+
+	new_ino_blks_per_grp = ext2fs_div_ceil(
+					EXT2_INODES_PER_GROUP(fs->super)*
+					new_inode_size,
+					fs->blocksize);
+
+	/* We may change the file system.
+	 * Mark the file system as invalid so that
+	 * the user is prompted to run fsck.
+	 */
+	fs->super->s_state &= ~EXT2_VALID_FS;
+
+	retval = ext2fs_allocate_block_bitmap(fs, _("blocks to be moved"),
+						&bmap);
+	if (retval)
+		return retval;
+
+	retval = get_move_bitmap(fs, new_ino_blks_per_grp, bmap);
+	if (retval)
+		goto err_out;
+
+	retval = move_block(fs, bmap);
+	if (retval)
+		goto err_out;
+
+	retval = inode_scan_and_fix(fs);
+	if (retval)
+		goto err_out;
+
+	retval = expand_inode_table(fs, new_inode_size);
+	if (retval)
+		goto err_out;
+
+	ext2fs_calculate_summary_stats(fs);
+
+	fs->super->s_state |= EXT2_VALID_FS;
+	/* mark super block and block bitmap as dirty */
+	ext2fs_mark_super_dirty(fs);
+	ext2fs_mark_bb_dirty(fs);
+
+err_out:
+	free_blk_move_list();
+	ext2fs_free_block_bitmap(bmap);
+
+	return retval;
+}
+
+static int setup_tdb(const char *name)
+{
+	char *tdb_dir, tdb_file[PATH_MAX];
+#if 0 /* FIXME!! */
+	/*
+	 * Configuration via a conf file would be
+	 * nice
+	 */
+	profile_get_string(profile, "scratch_files",
+					"directory", 0, 0,
+					&tdb_dir);
+#endif
+	tdb_dir = getenv("TUNE2FS_SCRATCH_DIR");
+	if (!tdb_dir) {
+		com_err(__FUNCTION__, 0,
+			_("TUNE2FS_SCRATCH_DIR not configured\n"));
+		printf(_("Using /var/lib/e2fsprogs\n"));
+		tdb_dir="/var/lib/e2fsprogs";
+
+	}
+	if (access(tdb_dir, W_OK)) {
+		fprintf(stderr,
+			_("Cannot create file under %s\n"),
+			tdb_dir);
+		return EXT2_ET_INVALID_ARGUMENT;
+
+	}
+
+	sprintf(tdb_file, "%s/tune2fs-XXXXXX", tdb_dir);
+
+	if (!access(tdb_file, F_OK)) {
+		fprintf(stderr,
+			_("File exist %s\n"), tdb_file);
+		return EXT2_ET_INVALID_ARGUMENT;
+	}
+
+	set_undo_io_backup_file(tdb_file);
+	printf(_("To undo the tune2fs operations please run "
+		 "the command\nundoe2fs %s  %s\n\n"),
+		 tdb_file, name);
+
+	return 0;
+}
 
 int main (int argc, char ** argv)
 {
@@ -768,7 +1253,19 @@ int main (int argc, char ** argv)
 	io_ptr = test_io_manager;
 	test_io_backing_manager = unix_io_manager;
 #else
-	io_ptr = unix_io_manager;
+	if (I_flag) {
+		/*
+		 * If inode resize is requested use the
+		 * Undo I/O manager
+		 */
+		io_ptr = undo_io_manager;
+		set_undo_io_backing_manager(unix_io_manager);
+		retval = setup_tdb(device_name);
+		if (retval)
+			exit(1);
+	} else {
+		io_ptr = unix_io_manager;
+	}
 #endif
 	retval = ext2fs_open2(device_name, io_options, open_flag, 
 			      0, 0, io_ptr, &fs);
@@ -919,6 +1416,28 @@ int main (int argc, char ** argv)
 		}
 		ext2fs_mark_super_dirty(fs);
 	}
+	if (I_flag) {
+		if (mount_flags & EXT2_MF_MOUNTED) {
+			fputs(_("The Inode size may only be "
+				"changed when the filesystem is "
+				"unmounted.\n"), stderr);
+			exit(1);
+		}
+		/*
+		 * We want to update group descriptor also
+		 * with the new free inode count
+		 */
+		fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+		if (resize_inode(fs, new_inode_size)) {
+
+			fputs(_("Error in resizing the Inode.\n"
+				"Run undoe2fs to undo the "
+				"file system changes. \n"), stderr);
+		} else {
+			printf (_("Setting Inode size  %d\n"),
+							new_inode_size);
+		}
+	}
 
 	if (l_flag)
 		list_super (sb);
-- 
1.5.3.rc2.22.g69a9b-dirty

-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ