lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <483FE282.1010003@rs.jp.nec.com>
Date:	Fri, 30 May 2008 20:18:26 +0900
From:	Akira Fujita <a-fujita@...jp.nec.com>
To:	linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	Theodore Tso <tytso@....edu>, Mingming Cao <cmm@...ibm.com>
CC:	Akira Fujita <a-fujita@...jp.nec.com>
Subject: [RFC][PATCH 6/8]ext4: check the free space fragmentation (-f mode)

ext4: online defrag-- Check the free space fragmentation (-f mode)

From: Akira Fujita <a-fujita@...jp.nec.com>

Check the free space fragmentation in the block group
where target file is located.

Signed-off-by: Akira Fujita <a-fujita@...jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@...jp.nec.com>
---
 fs/ext4/balloc.c |    2 +-
 fs/ext4/defrag.c |  275 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/ext4.h   |   34 +++++++
 fs/ext4/ioctl.c  |    5 +-
 4 files changed, 312 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b961ad1..a3fb70c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -919,7 +919,7 @@ static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
  * bitmap on disk and the last-committed copy in journal, until we find a
  * bit free in both bitmaps.
  */
-static ext4_grpblk_t
+ext4_grpblk_t
 bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
 					ext4_grpblk_t maxblocks)
 {
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 61f577b..ac85330 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -20,6 +20,12 @@
 #include "ext4_extents.h"
 #include "group.h"

+#define EXT_SET_EXTENT_DATA(src, dest)  do {			\
+		dest.block = le32_to_cpu(src->ee_block);	\
+		dest.start = ext_pblock(src);			\
+		dest.len = le16_to_cpu(src->ee_len);		\
+					} while (0)
+
 /**
  * ext4_defrag_next_extent - Search for the next extent and set it to "extent"
  *
@@ -90,6 +96,223 @@ err:
 	return -EIO;
 }

+/**
+ * ext4_defrag_extents_info - Get extents information
+ *
+ * @sb:				for ext4_iget()
+ * @ext_info:			pointer to ext4_extents_info
+ *  @ext_info->ino:		describe an inode which is used to get
+ *				extent information
+ *  @ext_info->max_entries:	defined by DEFRAG_MAX_ENT
+ *  @ext_info->entries:		amount of extents (output)
+ *  @ext_info->ext[]:		array of extent (output)
+ *  @ext_info->offset:		starting block offset of targeted extent
+ *				(file relative)
+ *
+ * This function returns 0 if the next extent(s) exists,
+ * or returns 1 if the next extent doesn't exist,
+ * otherwise returns error value.
+ */
+static int
+ext4_defrag_extents_info(struct super_block *sb,
+				struct ext4_extents_info *ext_info)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_extent *ext = NULL;
+	struct inode *inode = NULL;
+	ext4_lblk_t offset = ext_info->f_offset;
+	int max_entries = ext_info->max_entries;
+	int depth, entries = 0;
+	int err = 0;
+	int ret = 0;
+
+	inode = ext4_iget(sb, ext_info->ino);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+
+	/* Return -ENOENT if a file does not exist */
+	if (!inode->i_nlink || inode->i_ino < EXT4_GOOD_OLD_FIRST_INO ||
+			!S_ISREG(inode->i_mode)) {
+		ext_info->entries = 0;
+		err = -ENOENT;
+		goto out;
+	}
+
+	path = ext4_ext_find_extent(inode, offset, NULL);
+	if (IS_ERR(path)) {
+		err = PTR_ERR(path);
+		path = NULL;
+		goto out;
+	}
+	depth = ext_depth(inode);
+
+	/* Skip the 0 size file */
+	if (path[depth].p_ext == NULL) {
+		ext_info->entries = 0;
+		goto out;
+	}
+	ext = path[depth].p_ext;
+	EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
+	entries = 1;
+
+	/*
+	 * The ioctl repeats this loop 'max_entries' times.
+	 * So we have to call this function again if @inode had
+	 * more the number of extents than 'max_entries'.
+	 */
+	while (entries < max_entries) {
+		ret = ext4_defrag_next_extent(inode, path, &ext);
+		if (ret == 0) {
+			/* Found the next extent (it means not the last one) */
+			EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
+			entries++;
+
+			/*
+			 * In case @inode has > 'max_entries' extents,
+			 * we must call this function again and restart from
+			 * 'max_entries * n + 1'th extent.
+			 * 'n' is the number of calling this function
+			 * at the same @inode.
+			 */
+			if (entries == max_entries) {
+				ext_info->f_offset =
+						le32_to_cpu(ext->ee_block) +
+						le16_to_cpu(ext->ee_len);
+				/* Check the extent is the last one or not */
+				ret =
+				    ext4_defrag_next_extent(inode, path, &ext);
+				if (ret == 1) {
+					err = ret;
+				} else if (ret < 0) {
+					/* Failed to get the next extent */
+					err = ret;
+					goto out;
+				}
+				break;
+			}
+
+		} else if (ret == 1) {
+			/* The extent is the last one */
+			ext_info->f_offset = 0;
+			err = ret;
+			break;
+		} else {
+			/* Failed to get the next extent */
+			err = ret;
+			goto out;
+		}
+	}
+
+	ext_info->entries = entries;
+
+out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	up_write(&EXT4_I(inode)->i_data_sem);
+	iput(inode);
+	return err;
+}
+
+/**
+ * ext4_defrag_fblocks_distribution - Search free blocks distribution
+ *
+ * @org_inode:	original inode
+ * @ext_info:	ext4_extents_info
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_fblocks_distribution(struct inode *org_inode,
+			struct ext4_extents_info *ext_info)
+{
+	struct buffer_head *bitmap_bh = NULL;
+	struct super_block *sb = org_inode->i_sb;
+	handle_t *handle;
+	ext4_group_t group_no;
+	ext4_grpblk_t start, end;
+	ext4_fsblk_t start_block = 0;
+	int i, err;
+	int num = 0;
+	int len = 0;
+	int block_set = 0;
+	int extra_block = 0;
+
+	if (!sb) {
+		printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+		return -ENOSPC;
+	}
+
+	group_no = (org_inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	start = ext_info->g_offset;
+	end = EXT4_BLOCKS_PER_GROUP(sb) - 1;
+
+	/* We consider about the boot block if bs = 1k */
+	if (sb->s_blocksize == 1024)
+		extra_block = 1;
+
+	handle = ext4_journal_start(org_inode, 1);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		return err;
+	}
+
+	bitmap_bh = read_block_bitmap(sb, group_no);
+	if (!bitmap_bh) {
+		err = -EIO;
+		goto out;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+	err = ext4_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto out;
+
+	for (i = start; i <= end ; i++) {
+		if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) {
+			len++;
+			/*
+			 * Reset start_block if the free block is
+			 * the head of region.
+			 */
+			if (!block_set) {
+				start_block =
+				 i + group_no * EXT4_BLOCKS_PER_GROUP(sb) +
+				 extra_block;
+				block_set = 1;
+			}
+		} else if (len) {
+			ext_info->ext[num].start = start_block;
+			ext_info->ext[num].len = len;
+			num++;
+			len = 0;
+			block_set = 0;
+			if (num == ext_info->max_entries) {
+				ext_info->g_offset = i + 1;
+				break;
+			}
+		}
+		if (i == end && len) {
+			ext_info->ext[num].start = start_block;
+			ext_info->ext[num].len = len;
+			num++;
+		}
+	}
+
+	ext_info->entries = num;
+out:
+	ext4_journal_release_buffer(handle, bitmap_bh);
+	brelse(bitmap_bh);
+
+	if (handle)
+		ext4_journal_stop(handle);
+
+	return err;
+}
+
 int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 			unsigned long arg)
 {
@@ -114,6 +337,52 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		block = ext4_bmap(mapping, block);

 		return put_user(block, p);
+	} else if (cmd == EXT4_IOC_GROUP_INFO) {
+		struct ext4_group_data_info grp_data;
+
+		if (copy_from_user(&grp_data,
+			(struct ext4_group_data_info __user *)arg,
+			sizeof(grp_data)))
+			return -EFAULT;
+
+		grp_data.s_blocks_per_group =
+			EXT4_BLOCKS_PER_GROUP(inode->i_sb);
+		grp_data.s_inodes_per_group =
+			EXT4_INODES_PER_GROUP(inode->i_sb);
+
+		if (copy_to_user((struct ext4_group_data_info __user *)arg,
+			&grp_data, sizeof(grp_data)))
+			return -EFAULT;
+	} else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+			(struct ext4_extents_info __user *)arg,
+			sizeof(ext_info)))
+			return -EFAULT;
+
+		BUG_ON(ext_info.ino != inode->i_ino);
+
+		err = ext4_defrag_fblocks_distribution(inode, &ext_info);
+
+		if (!err)
+			err = copy_to_user(
+				(struct ext4_extents_info __user *)arg,
+				&ext_info, sizeof(ext_info));
+	} else if (cmd == EXT4_IOC_EXTENTS_INFO) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+				(struct ext4_extents_info __user *)arg,
+				sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_extents_info(inode->i_sb, &ext_info);
+		if (err >= 0) {
+			if (copy_to_user((struct ext4_extents_info __user *)arg,
+				&ext_info, sizeof(ext_info)))
+				return -EFAULT;
+		}
 	} else if (cmd == EXT4_IOC_DEFRAG) {
 		struct ext4_ext_defrag_data defrag;
 		struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
@@ -1127,11 +1396,13 @@ out2:
  *
  * @org_inode:		original inode
  * @defrag_size:	size of defrag in blocks
+ * @goal:		poiter to block offset for allocation
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
-ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size)
+ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
+		ext4_fsblk_t *goal)
 {

 	/* ext4 online defrag supports only 4KB block size */
@@ -1242,7 +1513,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 	int ret, depth, seq_extents, last_extent = 0;

 	/* Check the filesystem enviroment whether defrag can be done */
-	ret = ext4_defrag_check(org_inode, defrag_size);
+	ret = ext4_defrag_check(org_inode, defrag_size, &goal);
 	if (ret < 0)
 		return ret;

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 12b3fea..d0b1301 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -300,6 +300,9 @@ struct ext4_new_group_data {
 #define EXT4_IOC_MIGRATE		_IO('f', 7)
 #define EXT4_IOC_FIBMAP			_IOW('f', 9, ext4_fsblk_t)
 #define EXT4_IOC_DEFRAG		_IOW('f', 10, struct ext4_ext_defrag_data)
+#define EXT4_IOC_GROUP_INFO	_IOW('f', 11, struct ext4_group_data_info)
+#define EXT4_IOC_FREE_BLOCKS_INFO	_IOW('f', 12, struct ext4_extents_info)
+#define EXT4_IOC_EXTENTS_INFO		_IOW('f', 13, struct ext4_extents_info)

 /*
  * ioctl commands in 32 bit emulation
@@ -323,12 +326,41 @@ struct ext4_new_group_data {
  */
 #define DEFRAG_BLOCK_SIZE	4096

+/*
+ * The following four macros are used for the defrag force mode.
+ *
+ * DEFRAG_MAX_ENT:	the maximum number of extents for exchanging between
+ *			kernel-space and user-space per an ioctl
+ */
+#define DEFRAG_MAX_ENT		32
+
+struct ext4_extent_data {
+	ext4_lblk_t block;		/* start logical block number */
+	ext4_fsblk_t start;		/* start physical block number */
+	int len;			/* blocks count */
+};
+
 struct ext4_ext_defrag_data {
 	ext4_lblk_t start_offset;	/* start offset to defrag in blocks */
 	ext4_lblk_t defrag_size;	/* size of defrag in blocks */
 	ext4_fsblk_t goal;		/* block offset for allocation */
 };

+struct ext4_group_data_info {
+	int s_blocks_per_group;		/* blocks per group */
+	int s_inodes_per_group;		/* inodes per group */
+};
+
+struct ext4_extents_info {
+	unsigned long long ino;		/* inode number */
+	int max_entries;		/* maximum extents count */
+	int entries;			/* extent number/count */
+	ext4_lblk_t f_offset;		/* file offset */
+	ext4_grpblk_t g_offset;		/* group offset */
+	ext4_fsblk_t goal;		/* block offset for allocation */
+	struct ext4_extent_data ext[DEFRAG_MAX_ENT];
+};
+
 #define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */

 /*
@@ -1010,6 +1042,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
 extern void ext4_init_block_alloc_info(struct inode *);
 extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
+extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
+				struct buffer_head *, ext4_grpblk_t);

 /* dir.c */
 extern int ext4_check_dir_entry(const char *, struct inode *,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e1b9c10..e012193 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -242,7 +242,10 @@ setversion_out:
 		return err;
 	}
 	case EXT4_IOC_FIBMAP:
-	case EXT4_IOC_DEFRAG: {
+	case EXT4_IOC_DEFRAG:
+	case EXT4_IOC_GROUP_INFO:
+	case EXT4_IOC_FREE_BLOCKS_INFO:
+	case EXT4_IOC_EXTENTS_INFO: {
 		return ext4_defrag_ioctl(inode, filp, cmd, arg);
 	}
 	case EXT4_IOC_GROUP_ADD: {

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ