lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <49019F04.9050303@rs.jp.nec.com>
Date:	Fri, 24 Oct 2008 19:10:12 +0900
From:	Akira Fujita <a-fujita@...jp.nec.com>
To:	linux-ext4@...r.kernel.org, Theodore Tso <tytso@....edu>,
	Mingming Cao <cmm@...ibm.com>
CC:	linux-fsdevel@...r.kernel.org
Subject: [RFC][PATCH 9/9]ext4: online defrag command

ext4: online defrag -- Online defrag command

From: Akira Fujita <a-fujita@...jp.nec.com>

- The defrag command. Usage is as follows:
  - Put the multiple files closer together.
    # e4defrag -r directory-name
  - Defrag for free space fragmentation.
    # e4defrag -f file-name
  - Defrag for a single file.
    # e4defrag file-name
  - Defrag for all files on ext4.
    # e4defrag device-name

Signed-off-by: Akira Fujita <a-fujita@...jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@...jp.nec.com>
---
/*
 * e4defrag.c - ext4 filesystem defragmenter
 *
 * Copyright (C) 2008 NEC Software Tohoku, Ltd.
 *
 * Author: Akira Fujita	<a-fujita@...jp.nec.com>
 *         Takashi Sato	<t-sato@...jp.nec.com>
 */

#ifndef _LARGEFILE_SOURCE
#define _LARGEFILE_SOURCE
#endif

#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif

#define _XOPEN_SOURCE	500
#define _GNU_SOURCE
#include <ftw.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <dirent.h>
#include <limits.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/statfs.h>
#include <sys/vfs.h>
#include <sys/ioctl.h>
#include <mntent.h>
#include <linux/fs.h>
#include <ctype.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <ext2fs/ext2_types.h>
#include <endian.h>

/* Ioctl command */
#define EXT4_IOC_DEFRAG		_IOW('f', 15, struct ext4_ext_defrag_data)
#define EXT4_IOC_SUPER_INFO	_IOR('f', 16, struct ext4_super_block)
#define EXT4_IOC_FREE_BLOCKS_INFO _IOWR('f', 17, struct ext4_extents_info)
#define EXT4_IOC_FIEMAP_INO	_IOWR('f', 18, struct fiemap_ino)
#define EXT4_IOC_MOVE_VICTIM	_IOW('f', 19, struct ext4_extents_info)
#define FS_IOC_FIEMAP		_IOWR('f', 11, struct fiemap)

/* Macro functions */
#define PRINT_ERR_MSG(msg)	fprintf(stderr, "%s\n", (msg));
#define IN_FTW_PRINT_ERR_MSG(msg)	\
	fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg));
#define PRINT_FILE_NAME(file)	fprintf(stderr, " \"%s\"\n", (file));
#define PRINT_ERR_MSG_WITH_ERRNO(msg)	\
	fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno));
#define min(x, y) (((x) > (y)) ? (y) : (x))
/* Wrap up the free function */
#define FREE(tmp)				\
	do {					\
		if (tmp)			\
			free(tmp);		\
	} while (0)				\
/* Insert list2 after list1 */
#define insert(list1, list2)			\
	do {					\
		list2->next = list1->next;	\
		list1->next->prev = list2;	\
		list2->prev = list1;		\
		list1->next = list2;		\
	} while (0)

#ifndef __NR_fadvise64
#define __NR_fadvise64		250
#endif

#ifndef __NR_sync_file_range
#define __NR_sync_file_range	314
#endif

#ifndef POSIX_FADV_DONTNEED
#if defined(__s390x__)
#define POSIX_FADV_DONTNEED	6 /* Don't need these pages */
#else
#define POSIX_FADV_DONTNEED	4 /* Don't need these pages */
#endif
#endif

#ifndef SYNC_FILE_RANGE_WAIT_BEFORE
#define SYNC_FILE_RANGE_WAIT_BEFORE	1
#endif
#ifndef SYNC_FILE_RANGE_WRITE
#define SYNC_FILE_RANGE_WRITE		2
#endif
#ifndef SYNC_FILE_RANGE_WAIT_AFTER
#define SYNC_FILE_RANGE_WAIT_AFTER	4
#endif

#define DEVNAME			0
#define DIRNAME			1
#define FILENAME		2

#define RETURN_OK		0
#define RETURN_NG		-1
#define FTW_CONT		0
#define FTW_OPEN_FD		2000

#define FS_EXT4			"ext4"
#define ROOT_UID		0
#define CHECK_FRAG_COUNT	1

/* Extent status which are used in extent_t */
#define EXT4_EXT_USE		0
#define EXT4_EXT_FREE		1

/* The maximum number of extents for exchanging between
 * kernel-space and user-space per ioctl
 */
#define DEFRAG_MAX_ENT	32

/* The phase of force defrag mode */
#define DEFRAG_FORCE_TRY	1
#define DEFRAG_FORCE_GATHER	3

/* Magic number for ext4 */
#define EXT4_SUPER_MAGIC	0xEF53

/* Defrag size(64MB) per ioctl(not including force mode) */
#define DEFRAG_SIZE		((unsigned long)1 << 26)

/* Force defrag mode: Max file size in bytes (128MB) */
#define	MAX_FILE_SIZE		((unsigned long)1 << 27)


/* The following four macros are used for ioctl FS_IOC_FIEMAP
 * FIEMAP_FLAG_SYNC:	sync file data before map.
 * FIEMAP_EXTENT_LAST:	last extent in file.
 * FIEMAP_MAX_OFFSET:	max file offset.
 * EXTENT_MAX_COUNT:	the maximum number of extents for exchanging between
			kernel-space and user-space per ioctl
 */
#define FIEMAP_FLAG_SYNC	0x00000001
#define FIEMAP_EXTENT_LAST	0x00000001
#define FIEMAP_MAX_OFFSET	(~0ULL)
#define EXTENT_MAX_COUNT	512

/* The following macros are error message */
#define MSG_USAGE		\
"Usage : e4defrag [-v] file...| directory...| device...\n\
      : e4defrag -f file [blocknr] \n\
      : e4defrag -r directory... | device... \n"

#define MSG_R_OPTION		" with regional block allocation mode.\n"
#define NGMSG_MTAB		"Can not access /etc/mtab"
#define NGMSG_UNMOUNT		"FS is not mounted"
#define NGMSG_EXT4		"FS is not ext4 File System"
#define NGMSG_FS_INFO		"Get FSInfo fail"
#define NGMSG_FILE_INFO		"Get FileInfo fail"
#define NGMSG_FILE_OPEN		"Open fail"
#define NGMSG_FILE_SYNC		"Sync(fsync) fail"
#define NGMSG_FILE_DEFRAG	"Defrag fail"
#define NGMSG_FILE_BLOCKSIZE	"Can't get blocksize"
#define NGMSG_FILE_SUPER	"Can't get super block info"
#define NGMSG_FILE_UNREG	"File is not regular file"
#define NGMSG_VICTIM_UID	"Victim file is not current user's file"

#define NGMSG_FILE_LARGE	\
	"Defrag size is larger than FileSystem's free space"

#define NGMSG_FILE_PRIORITY	\
"File is not current user's file or current user is not root"

#define NGMSG_FILE_LOCK		"File is locked"
#define NGMSG_FILE_BLANK	"File size is 0"
#define NGMSG_GET_LCKINFO	"Get LockInfo fail"
#define NGMSG_TYPE		\
	"Can not process %s in regional mode.\n"
#define NGMSG_LOST_FOUND	"Can not process \"lost+found\""
#define NGMSG_REALPATH		"Can not get full path"
#define NGMSG_FILE_MAP		"Get file map fail"
#define NGMSG_FILE_DROP_BUFFER	"Free page fail"
#define NGMSG_FADVISE_SYSCALL	"\tFadvise fail"
#define NGMSG_FORCE_DEFRAG_SIZE	\
"Cannot specify a file larger than %luMB in force defrag mode"
#define NGMSG_ENDIAN		"Endian type is not big/little endian"

/* Data type for filesystem-wide blocks number */
typedef unsigned long long ext4_fsblk_t;

/* Data type for file logical block number */
typedef unsigned int ext4_lblk_t;

/* Data type for block offset of block group */
typedef int ext4_grpblk_t;

struct ext4_extent_data {
	ext4_lblk_t  block;		/* start logical block number */
	ext4_fsblk_t start;		/* start physical block number */
	int len;			/* blocks count */
};

/* Used for defrag */
struct ext4_ext_defrag_data {
	ext4_lblk_t start_offset;	/* start offset to defrag in blocks */
	ext4_lblk_t defrag_size;	/* size of defrag in blocks */
	ext4_fsblk_t goal;		/* block offset for allocation */
	int flag;			/* free space mode flag */
	struct ext4_extent_data ext;
};

typedef __u16 __le16;
typedef __u32 __le32;
typedef __u64 __le64;

/*
 * Structure of the super block
 */
struct ext4_super_block {
/*00*/	__le32	s_inodes_count;		/* Inodes count */
	__le32	s_blocks_count_lo;	/* Blocks count */
	__le32	s_r_blocks_count_lo;	/* Reserved blocks count */
	__le32	s_free_blocks_count_lo;	/* Free blocks count */
/*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
	__le32	s_first_data_block;	/* First Data Block */
	__le32	s_log_block_size;	/* Block size */
	__le32	s_obso_log_frag_size;	/* Obsoleted fragment size */
/*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
	__le32	s_obso_frags_per_group;	/* Obsoleted fragments per group */
	__le32	s_inodes_per_group;	/* # Inodes per group */
	__le32	s_mtime;		/* Mount time */
/*30*/	__le32	s_wtime;		/* Write time */
	__le16	s_mnt_count;		/* Mount count */
	__le16	s_max_mnt_count;	/* Maximal mount count */
	__le16	s_magic;		/* Magic signature */
	__le16	s_state;		/* File system state */
	__le16	s_errors;		/* Behaviour when detecting errors */
	__le16	s_minor_rev_level;	/* minor revision level */
/*40*/	__le32	s_lastcheck;		/* time of last check */
	__le32	s_checkinterval;	/* max. time between checks */
	__le32	s_creator_os;		/* OS */
	__le32	s_rev_level;		/* Revision level */
/*50*/	__le16	s_def_resuid;		/* Default uid for reserved blocks */
	__le16	s_def_resgid;		/* Default gid for reserved blocks */
	/*
	 * These fields are for EXT4_DYNAMIC_REV superblocks only.
	 *
	 * Note: the difference between the compatible feature set and
	 * the incompatible feature set is that if there is a bit set
	 * in the incompatible feature set that the kernel doesn't
	 * know about, it should refuse to mount the filesystem.
	 *
	 * e2fsck's requirements are more strict; if it doesn't know
	 * about a feature in either the compatible or incompatible
	 * feature set, it must abort and not try to meddle with
	 * things it doesn't understand...
	 */
	__le32	s_first_ino;		/* First non-reserved inode */
	__le16  s_inode_size;		/* size of inode structure */
	__le16	s_block_group_nr;	/* block group # of this superblock */
	__le32	s_feature_compat;	/* compatible feature set */
/*60*/	__le32	s_feature_incompat;	/* incompatible feature set */
	__le32	s_feature_ro_compat;	/* readonly-compatible feature set */
/*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
/*78*/	char	s_volume_name[16];	/* volume name */
/*88*/	char	s_last_mounted[64];	/* directory where last mounted */
/*C8*/	__le32	s_algorithm_usage_bitmap; /* For compression */
	/*
	 * Performance hints.  Directory preallocation should only
	 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
	 */
	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
	/*
	 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
	 */
/*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
/*E0*/	__le32	s_journal_inum;		/* inode number of journal file */
	__le32	s_journal_dev;		/* device number of journal file */
	__le32	s_last_orphan;		/* start of list of inodes to delete */
	__le32	s_hash_seed[4];		/* HTREE hash seed */
	__u8	s_def_hash_version;	/* Default hash version to use */
	__u8	s_reserved_char_pad;
	__le16  s_desc_size;		/* size of group descriptor */
/*100*/	__le32	s_default_mount_opts;
	__le32	s_first_meta_bg;	/* First metablock block group */
	__le32	s_mkfs_time;		/* When the filesystem was created */
	__le32	s_jnl_blocks[17];	/* Backup of the journal inode */
	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
/*150*/	__le32	s_blocks_count_hi;	/* Blocks count */
	__le32	s_r_blocks_count_hi;	/* Reserved blocks count */
	__le32	s_free_blocks_count_hi;	/* Free blocks count */
	__le16	s_min_extra_isize;	/* All inodes have at least # bytes */
	__le16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
	__le32	s_flags;		/* Miscellaneous flags */
	__le16  s_raid_stride;		/* RAID stride */
	__le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
	__le64  s_mmp_block;            /* Block for multi-mount protection */
	__le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
	__u8	s_log_groups_per_flex;  /* FLEX_BG group size */
	__u8	s_reserved_char_pad2;
	__le16  s_reserved_pad;
	__u32   s_reserved[162];        /* Padding to the end of the block */
};

struct ext4_extents_info {
	unsigned long long ino;		/* inode number */
	int max_entries;		/* maximum extents count */
	int entries; 	 		/* extent number/count */
	ext4_lblk_t  f_offset;		/* file offset */
	ext4_grpblk_t g_offset;		/* group offset */
	ext4_fsblk_t goal;
	struct ext4_extent_data ext[DEFRAG_MAX_ENT];
};

typedef struct extent {
	struct extent *prev;
	unsigned long tag;		/* extent status */
	unsigned long ino;		/* file's inode number */
	struct ext4_extent_data data;	/* extent belong to file */
	struct extent *next;
} extent_t;

typedef struct exts_group {
	struct exts_group *prev;
	extent_t *start;		/* start ext */
	extent_t *end;			/* end ext */
	int len;			/* length of this continuous region */
	struct exts_group *next;
} exts_group_t;

struct fiemap_extent {
	__u64 fe_logical;	/* logical offset in bytes for the start of
				* the extent from the beginning of the file */
	__u64 fe_physical;	/* physical offset in bytes for the start
				* of the extent from the beginning
				* of the disk */
	__u64 fe_length;	/* length in bytes for this extent */
	__u64 fe_reserved64[2];
	__u32 fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
	__u32 fe_reserved[3];
};

struct fiemap {
	__u64 fm_start;		/* logical offset (inclusive) at
				* which to start mapping (in) */
	__u64 fm_length;	/* logical length of mapping which
				* userspace wants (in) */
	__u32 fm_flags;		/* FIEMAP_FLAG_* flags for request (in/out) */
	__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
	__u32 fm_extent_count;	/* size of fm_extents array (in) */
	__u32 fm_reserved;
	struct fiemap_extent fm_extents[0];/* array of mapped extents (out) */
};

struct fiemap_ino {
	__u64 ino;
	struct fiemap fiemap;
};

int	force_flag;
int	detail_flag;
int	regional_flag;
int	extents_before_defrag;
int	extents_after_defrag;
unsigned long	succeed_cnt;
unsigned long	regular_count;
unsigned long	total_count;
unsigned long	frag_files_before_defrag;
unsigned long	frag_files_after_defrag;
unsigned long	defraged_file_count;
char	lost_found_dir[PATH_MAX + 1];
ext4_fsblk_t	goal;
ext4_fsblk_t	fgoal = -1;

/*
 * ext2fs_swab32() -	Change endian.
 *
 * @val:		the entry used for change.
 */
__u32 ext2fs_swab32(__u32 val)
{
#if BYTE_ORDER == BIG_ENDIAN
	return (val>>24) | ((val>>8)&0xFF00) |
		((val<<8)&0xFF0000) | (val<<24);
#else
	return val;
#endif
}

/*
 * fadvise() -		Give advice about file access.
 *
 * @fd:			the file's descriptor.
 * @offset:		file offset.
 * @len:		area length.
 * @advise:		process flag.
 */
int fadvise(int fd, loff_t offset, size_t len, int advise)
{
	return syscall(__NR_fadvise64, fd, offset, len, advise);
}

/*
 * sync_file_range() -	Sync file region.
 *
 * @fd:			the file's descriptor.
 * @offset:		file offset.
 * @length:		area length.
 * @flag:		process flag.
 */
int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag)
{
	return syscall(__NR_sync_file_range, fd, offset, length, flag);
}

/*
 * page_in_core() -	Get information on whether pages are in core.
 *
 * @fd:			the file's descriptor.
 * @defrag_data:	data used for defrag.
 * @vec:		page state array.
 * @page_num:		page number.
 */
int page_in_core(int fd, struct ext4_ext_defrag_data defrag_data,
		 unsigned char **vec, unsigned long *page_num)
{
	int blocksize;
	int pagesize = getpagesize();
	void *page = NULL;
	loff_t offset, end_offset, length;

	if (vec == NULL || *vec != NULL)
		return RETURN_NG;

	if (ioctl(fd, FIGETBSZ, &blocksize) < 0)
		return RETURN_NG;

	/* In mmap, offset should be a multiple of the page size */
	offset = defrag_data.start_offset * blocksize;
	length = defrag_data.defrag_size * blocksize;
	end_offset = offset + length;
	/* Round the offset down to the nearest multiple of pagesize */
	offset = (offset / pagesize) * pagesize;
	length = end_offset - offset;

	page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
	if (page == MAP_FAILED)
		return RETURN_NG;

	*page_num = 0;
	*page_num = (length + pagesize - 1) / pagesize;
	*vec = (unsigned char *)calloc(*page_num, 1);
	if (*vec == NULL)
		return RETURN_NG;

	/* Get information on whether pages are in core */
	if (mincore(page, (size_t)length, *vec) == -1 ||
	    munmap(page, length) == -1) {
		FREE(*vec);
		return RETURN_NG;
	}

	return RETURN_OK;
}

/*
 * defrag_fadvise() -	Predeclare an access pattern for file data.
 *
 * @fd:			the file's descriptor.
 * @defrag_data:	data used for defrag.
 * @vec:		page state array.
 * @page_num:		page number.
 */
int defrag_fadvise(int fd, struct ext4_ext_defrag_data defrag_data,
		   unsigned char *vec, unsigned long page_num)
{
	int flag = 1;
	int blocksize;
	int pagesize = getpagesize();
	int fadvise_flag = POSIX_FADV_DONTNEED;
	int sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE|
			SYNC_FILE_RANGE_WAIT_AFTER;
	unsigned long i;
	loff_t offset;

	if (ioctl(fd, FIGETBSZ, &blocksize) < 0)
		return RETURN_NG;

	offset = (loff_t)defrag_data.start_offset * blocksize;
	offset = (offset / pagesize) * pagesize;

	/* Sync file for fadvise process */
	if (sync_file_range(fd, offset,
		(loff_t)pagesize*page_num, sync_flag) != 0)
		return RETURN_NG;

	/* Try to release buffer cache which this process used,
	 * then other process can use the released buffer
	 */
	for (i = 0; i < page_num; i++) {
		if ((vec[i] & 0x1) == 0) {
			offset += pagesize;
			continue;
		}
		if (fadvise(fd, offset, pagesize, fadvise_flag) != 0) {
			if (detail_flag && flag) {
				perror(NGMSG_FADVISE_SYSCALL);
				flag = 0;
			}
		}
		offset += pagesize;
	}

	return RETURN_OK;
}

/*
 * check_free_size() -	Check if there's enough disk space.
 *
 * @fd:			the file's descriptor.
 * @file_name		file name.
 * @buf:		the pointer of the struct stat64.
 */
int check_free_size(int fd, const char *file_name, const struct stat64 *buf)
{
	off64_t	size = 0;
	off64_t	free_size = 0;
	struct statfs	fsbuf;

	/* Target file size */
	size = buf->st_size;

	if (fstatfs(fd, &fsbuf) < 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file_name);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FS_INFO);
		}
		return RETURN_NG;
	}

	/* Compute free space for root and normal user separately */
	if (getuid() == ROOT_UID)
		free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bfree;
	else
		free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bavail;

	if (free_size >= size)
		return RETURN_OK;

	if (detail_flag) {
		PRINT_FILE_NAME(file_name);
		IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_LARGE);
	}
	return RETURN_NG;
}

int file_check(int fd, const struct stat64 *buf, const char *file_name);
int force_defrag(int fd, const char *file_name,
		const struct stat64 *buf, int blocksize);

/*
 * file_frag_count() -  Get file fragment count.
 *
 * @fd:			the file's descriptor.
 */
long long file_frag_count(int fd)
{
	int ret = RETURN_NG;
	struct fiemap file_extent_map;

	/* When fm_extent_count is 0,
	 * ioctl just get file fragment count.
	 */
	memset(&file_extent_map, 0, sizeof(struct fiemap));
	file_extent_map.fm_start = 0;
	file_extent_map.fm_length = FIEMAP_MAX_OFFSET;
	file_extent_map.fm_flags |= FIEMAP_FLAG_SYNC;

	ret = ioctl(fd, FS_IOC_FIEMAP, &file_extent_map);
	if (ret < 0)
		return RETURN_NG;

	return file_extent_map.fm_mapped_extents;
}

/*
 * ftw_fn() -           Check file attributes and ioctl call to avoid.
 * 			illegal operations.
 *
 * @file:		the file's name.
 * @buf:		the pointer of the struct stat64.
 * @flag:		file type.
 * @ftwbuf:		the pointer of a struct FTW.
 */
int ftw_fn(const char *file, const struct stat64 *buf, int flag,
	   struct FTW *ftwbuf)
{
	int	fd;
	int	blocksize;
	int	percent = 0;
	int	defrag_fail = 0;
	int	defraged_size = 0;
	int	ret = RETURN_NG;
	long long	file_frags_start, file_frags_end;
	unsigned long	page_num;
	unsigned char	*vec = NULL;
	loff_t	start = 0;
	struct ext4_ext_defrag_data	df_data;

	defraged_file_count++;

	if (detail_flag) {
		printf("[%lu/%lu]", defraged_file_count, total_count);
		fflush(stdout);
	}

	if (lost_found_dir[0] != '\0' &&
	    !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
		if (detail_flag) {
			PRINT_FILE_NAME(file);
			IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND);
		}
		return FTW_CONT;
	}

	if (flag != FTW_F) {
		if (detail_flag) {
			PRINT_FILE_NAME(file);
			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
		}
		return FTW_CONT;
	}

	fd = open64(file, O_RDONLY);
	if (fd < 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
		}
		return FTW_CONT;
	}

	if (file_check(fd, buf, file) == RETURN_NG)
		goto out;

	if (fsync(fd) < 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_SYNC);
		}
		goto out;
	}
	/* Get blocksize */
	if (ioctl(fd, FIGETBSZ, &blocksize) < 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_BLOCKSIZE);
		}
		goto out;
	}
	/* Ioctl call does the actual defragment job */
	df_data.start_offset = 0;
	df_data.goal = goal;
	df_data.ext.len = 0;
	df_data.flag = force_flag;

	/* Count file fragments before defrag */
	if (detail_flag) {
		file_frags_start = file_frag_count(fd);
		if (file_frags_start == RETURN_NG) {
			PRINT_FILE_NAME(file);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
			goto out;
		}

		if (file_frags_start != 1)
			frag_files_before_defrag++;

		extents_before_defrag += file_frags_start;
	}

	/* Print process progress */
	percent = (start * 100) / buf->st_size;
	printf("\033[79;0H\033[K[%lu/%lu]%s:\t%3d%%",
		defraged_file_count, total_count, file, min(percent, 100));
	fflush(stdout);

	while (1) {
		df_data.defrag_size = (min((buf->st_size - start),
			DEFRAG_SIZE) + blocksize - 1) / blocksize;
		ret = page_in_core(fd, df_data, &vec, &page_num);
		if (ret == RETURN_NG) {
			if (detail_flag) {
				printf("\n");
				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_MAP);
			} else {
				printf("\t[ NG ]\n");
			}
			defrag_fail = 1;
			break;
		}

		/* EXT4_IOC_DEFRAG */
		defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &df_data);

		/* Free pages */
		ret = defrag_fadvise(fd, df_data, vec, page_num);
		if (vec) {
			free(vec);
			vec = NULL;
		}
		if (ret == RETURN_NG) {
			if (detail_flag) {
				printf("\n");
				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_DROP_BUFFER
					);
			} else {
				printf("\t[ NG ]\n");
			}
			defrag_fail = 1;
			break;
		}

		/* Go into force defrag mode */
		if (defraged_size < 0 && force_flag == DEFRAG_FORCE_TRY
					&& errno == ENOSPC) {
			/* File size is larger than max size of
			 * force defrag mode
			 */
			if (buf->st_size > MAX_FILE_SIZE) {
				if (detail_flag) {
					fprintf(stderr, "\n\t");
					fprintf(stderr, NGMSG_FORCE_DEFRAG_SIZE,
						(MAX_FILE_SIZE) / (1024 * 1024)
						);
					fprintf(stderr, "\t[ NG ]\n");
				} else {
					printf("\t[ NG ]\n");
				}
				defrag_fail = 1;
				break;
			}

			printf("\033[79;0H\033[K");
			defraged_size = force_defrag(fd, file, buf, blocksize);
			if (defraged_size * blocksize >= buf->st_size)
				/* Whole file is enforcedly defraged */
				break;
			else {
				if (!detail_flag) {
					printf("\033[79;0H\033[K[%lu/%lu]%s\t"
						"0%%\t[ NG ]\n",
						defraged_file_count,
						total_count, file);
				}
				defrag_fail = 1;
				break;
			}
		}
		if (defraged_size < 0) {
			if (detail_flag) {
				printf("\n");
				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_DEFRAG);
			} else {
				printf("\t[ NG ]\n");
			}
			defrag_fail = 1;
			break;
		}
		df_data.start_offset += defraged_size;
		start = (long long)df_data.start_offset * blocksize;

		/* Print process progress */
		percent = (start * 100) / buf->st_size;

		/* Disk space file used is bigger than logical size */
		printf("\033[79;0H\033[K[%lu/%lu]%s:\t%3d%%",
			defraged_file_count, total_count, file,
				min(percent, 100));
		fflush(stdout);

		/* End of file */
		if (start >= buf->st_size)
			break;
	}

	/* Count file fragments after defrag and print extents info */
	if (detail_flag) {
		file_frags_end = file_frag_count(fd);
		if (file_frags_end == RETURN_NG) {
			printf("\n");
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
			goto out;
		}

		if (file_frags_end != 1)
			frag_files_after_defrag++;

		extents_after_defrag += file_frags_end;

		if (defrag_fail)
			goto out;

		printf("  extents: %lld -> %lld",
			file_frags_start, file_frags_end);
		fflush(stdout);
	}

	if (defrag_fail)
		goto out;

	printf("\t[ OK ]\n");
	fflush(stdout);
	succeed_cnt++;

out:
	close(fd);
	return FTW_CONT;
}

/*
 * file_check() -       Check file's attributes.
 *
 * @fd:			the file's descriptor.
 * @buf:		a pointer of the struct stat64.
 * @file_name:		the file's name.
 */
int file_check(int fd, const struct stat64 *buf, const char *file_name)
{
	struct flock	lock;

	/* Write-lock check is more reliable */
	lock.l_type = F_WRLCK;
	lock.l_start = 0;
	lock.l_whence = SEEK_SET;
	lock.l_len = 0;

	/* Regular file */
	if (S_ISREG(buf->st_mode) == 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file_name);
			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
		}
		return RETURN_NG;
	}

	/* Free space */
	if (check_free_size(fd, file_name, buf) == RETURN_NG)
		return RETURN_NG;

	/* Priority */
	if (getuid() != ROOT_UID &&
		buf->st_uid != getuid()) {
		if (detail_flag) {
			PRINT_FILE_NAME(file_name);
			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_PRIORITY);
		}
		return RETURN_NG;
	}

	/* Lock status */
	if (fcntl(fd, F_GETLK, &lock) < 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file_name);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_GET_LCKINFO);
		}
		return RETURN_NG;
	} else if (lock.l_type != F_UNLCK) {
		if (detail_flag) {
			PRINT_FILE_NAME(file_name);
			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_LOCK);
		}
		return RETURN_NG;
	}

	/* Empty file */
	if (buf->st_size == 0) {
		if (detail_flag) {
			PRINT_FILE_NAME(file_name);
			IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_BLANK);
		}
		return RETURN_NG;
	}

	return RETURN_OK;
}

/*
 * is_ext4() -		Whether on an ext4 filesystem.
 *
 * @filename:		the file's name.
 */
int is_ext4(const char *filename)
{
	int 	maxlen, len, ret;
	FILE	*fp = NULL;
	char	*mnt_type = NULL;
	/* Refer to /etc/mtab */
	char	*mtab = MOUNTED;
	char	file_path[PATH_MAX + 1];
	struct mntent	*mnt = NULL;
	struct statfs	buffs;

	/* Get full path */
	if (realpath(filename, file_path) == NULL) {
		perror(NGMSG_REALPATH);
		PRINT_FILE_NAME(filename);
		return RETURN_NG;
	}

	if (statfs(file_path, &buffs) < 0) {
		perror(NGMSG_FS_INFO);
		PRINT_FILE_NAME(filename);
		return RETURN_NG;
	}

	if (buffs.f_type != EXT4_SUPER_MAGIC) {
		PRINT_ERR_MSG(NGMSG_EXT4);
		return RETURN_NG;
	}

	fp = setmntent(mtab, "r");
	if (fp == NULL) {
		perror(NGMSG_MTAB);
		return RETURN_NG;
	}

	maxlen = 0;
	while ((mnt = getmntent(fp)) != NULL) {
		len = strlen(mnt->mnt_dir);
		ret = memcmp(file_path, mnt->mnt_dir, len);
		if (ret != 0)
			continue;

		if (maxlen >= len)
			continue;

		maxlen = len;

		mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1);
		if (!mnt_type) {
			endmntent(fp);
			return RETURN_NG;
		}
		strcpy(mnt_type, mnt->mnt_type);
		strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX);
	}

	endmntent(fp);
	if (strcmp(mnt_type, FS_EXT4) == 0) {
		FREE(mnt_type);
		return RETURN_OK;
	} else {
		FREE(mnt_type);
		PRINT_ERR_MSG(NGMSG_EXT4);
		return RETURN_NG;
	}
}

/*
 * calc_entry_counts -	calculate file counts
 *
 * @file:		file name.
 * @buf:		file info.
 * @flag:		file type.
 * @ftwbuf:		the pointer of a struct FTW.
 */
int calc_entry_counts(const char *file, const struct stat64 *buf, int flag,
			   struct FTW *ftwbuf)
{

	if (flag == FTW_F)
		regular_count++;

	total_count++;

	return FTW_CONT;
}

/*
 * get_mount_point() -	Get device's mount point.
 *
 * @devname:		the device's name.
 * @mount_point:	the mount point.
 * @dir_path_len:	the length of directory.
 */
int get_mount_point(const char *devname, char *mount_point,
		int dir_path_len)
{
	/* Refer to /etc/mtab */
	char	*mtab = MOUNTED;
	FILE	*fp = NULL;
	struct mntent	*mnt = NULL;

	fp = setmntent(mtab, "r");
	if (fp == NULL) {
		perror(NGMSG_MTAB);
		return RETURN_NG;
	}

	while ((mnt = getmntent(fp)) != NULL) {
		if (strcmp(devname, mnt->mnt_fsname) != 0)
			continue;

		endmntent(fp);
		if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
			strncpy(mount_point, mnt->mnt_dir,
				dir_path_len);
			return RETURN_OK;
		}
		PRINT_ERR_MSG(NGMSG_EXT4);
		return RETURN_NG;
	}
	endmntent(fp);
	PRINT_ERR_MSG(NGMSG_UNMOUNT);
	return RETURN_NG;
}

/*
 * main() -		ext4 online defrag.
 *
 * @argc:		the number of parameter.
 * @argv[]:		the pointer array of parameter.
 */
int main(int argc, char *argv[])
{
	int	fd;
	int	opt;
	int	i, flags;
	int	arg_type;
	int	detail_tmp;
	int	success_flag;
	char	dir_name[PATH_MAX + 1];
	struct stat64	buf;

	i = 1;
	flags = 0;
	arg_type = -1;
	detail_tmp = -1;
	success_flag = 0;
	/* Do not follow symlink */
	flags |= FTW_PHYS;
	/* Stay within the same filesystem */
	flags |= FTW_MOUNT;

	/* Parse arguments */
	if (argc == 1 || (argc == 2 && argv[1][0] == '-'))
		goto out;

	while ((opt = getopt(argc, argv, "rvf")) != EOF) {
		switch (opt) {
		case 'r':
			regional_flag = 1;
			i = 2;
			break;
		case 'v':
			detail_flag = 1;
			i = 2;
			break;
		case 'f':
			force_flag = DEFRAG_FORCE_TRY;
			i = 2;

			if (argc == 3)
				break;

			if (argc > 4) {
				printf("Illegal argument\n");
				goto out;
			}

			/* Now the agrc must be 4(e4defrag -f file blocknr) */
			int res_strlen;
			res_strlen = strlen(argv[3]);
			for (res_strlen -= 1; res_strlen >= 0; res_strlen--) {
				if (!isdigit(argv[3][res_strlen])) {
					printf("Illegal argument\n");
					goto out;
				}
			}

			fgoal = strtoul(argv[3], NULL, 0);
			if (errno) {
				printf("block num shold be < 4294967295"
					"(max num of unsigned long 32bit)\n");
				exit(1);
			}
			if (!fgoal)
				fgoal = -1;
			break;
		default:
			goto out;
		}
	}

	/* Main process */
	for (; i < argc; i++) {
		succeed_cnt = 0;
		regular_count = 0;
		total_count = 0;
		frag_files_before_defrag = 0;
		frag_files_after_defrag = 0;
		extents_before_defrag = 0;
		extents_after_defrag = 0;
		defraged_file_count = 0;

		memset(dir_name, 0, PATH_MAX + 1);
		memset(lost_found_dir, 0, PATH_MAX + 1);

		if (force_flag && i == 3)
			break;

		#if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN
			PRINT_ERR_MSG(NGMSG_ENDIAN);
			PRINT_FILE_NAME(argv[i]);
			continue;
		#endif

		if (lstat64(argv[i], &buf) < 0) {
			perror(NGMSG_FILE_INFO);
			PRINT_FILE_NAME(argv[i]);
			continue;
		}

		/* Only regular file is acceptalbe with force defrag mode */
		if (force_flag && !S_ISREG(buf.st_mode)) {
			printf("Inappropriate file type\n");
			goto out;
		}

		if (S_ISBLK(buf.st_mode)) {
			/* Block device */
			if (get_mount_point(argv[i], dir_name, PATH_MAX)
							== RETURN_NG)
				continue;
			arg_type = DEVNAME;
			printf("ext4 defragmentation for device(%s)\n",
				argv[i]);
		} else if (S_ISDIR(buf.st_mode)) {
			/* Directory */
			if (access(argv[i], R_OK) < 0) {
				perror(argv[i]);
				continue;
			}
			arg_type = DIRNAME;
			strcpy(dir_name, argv[i]);
		} else if (S_ISREG(buf.st_mode)) {
			/* Regular file */
			arg_type = FILENAME;
		} else {
			/* Irregular file */
			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
			PRINT_FILE_NAME(argv[i]);
			continue;
		}

		/* For device case,
		 * filesystem type checked in get_mount_point()
		 */
		if (arg_type == FILENAME || arg_type == DIRNAME) {
			if (is_ext4(argv[i]) == RETURN_NG)
				continue;
			if (realpath(argv[i], dir_name) == NULL) {
				perror(NGMSG_REALPATH);
				PRINT_FILE_NAME(argv[i]);
				continue;
			}
		}

		switch (arg_type) {
		case DIRNAME:
			printf("ext4 defragmentation for directory(%s)\n",
				argv[i]);

			int mount_dir_len = 0;
			mount_dir_len = strnlen(lost_found_dir, PATH_MAX);

			strncat(lost_found_dir, "/lost+found",
				PATH_MAX - strnlen(lost_found_dir, PATH_MAX));

			/* Not the case("e4defrag  mount_piont_dir") */
			if (dir_name[mount_dir_len] != '\0') {
				/* "e4defrag mount_piont_dir/lost+found"
				 * or "e4defrag mount_piont_dir/lost+found/"
				 */
				if (strncmp(lost_found_dir, dir_name,
					    strnlen(lost_found_dir,
						    PATH_MAX)) == 0 &&
				    (dir_name[strnlen(lost_found_dir,
						      PATH_MAX)] == '\0' ||
				     dir_name[strnlen(lost_found_dir,
						      PATH_MAX)] == '/')) {
					PRINT_ERR_MSG(NGMSG_LOST_FOUND);
					PRINT_FILE_NAME(argv[i]);
					continue;
				}

				/* "e4defrag mount_piont_dir/else_dir" */
				memset(lost_found_dir, 0, PATH_MAX + 1);
			}
		case DEVNAME:
			if (arg_type == DEVNAME) {
				strncpy(lost_found_dir, dir_name,
					strnlen(dir_name, PATH_MAX));
				strncat(lost_found_dir, "/lost+found/",
					PATH_MAX - strnlen(lost_found_dir,
							   PATH_MAX));
			}

			/* Regional block allocation */
			if (regional_flag) {
				unsigned int bg_num = 0;
				int ret = 0;
				struct ext4_super_block sb;

				printf(MSG_R_OPTION);

				fd = open64(dir_name, O_RDONLY);
				if (fd < 0) {
					if (detail_flag) {
						perror(NGMSG_FILE_OPEN);
						PRINT_FILE_NAME(dir_name);
					}
					continue;
				}

				memset(&sb, 0, sizeof(struct ext4_super_block));

				ret = ioctl(fd, EXT4_IOC_SUPER_INFO, &sb);
				if (ret < 0) {
					perror(NGMSG_FILE_SUPER);
					PRINT_FILE_NAME(dir_name);
					close(fd);
					continue;
				}
				close(fd);

				sb.s_blocks_per_group =
					ext2fs_swab32(sb.s_blocks_per_group);
				sb.s_inodes_per_group =
					ext2fs_swab32(sb.s_inodes_per_group);
				sb.s_first_data_block =
					ext2fs_swab32(sb.s_first_data_block);

				bg_num = (buf.st_ino - 1) /
							sb.s_inodes_per_group;
				goal = bg_num * sb.s_blocks_per_group +
							sb.s_first_data_block;
			}
			nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags);

			/* File tree walk */
			nftw64(dir_name, ftw_fn, FTW_OPEN_FD, flags);
			printf("\n\tSuccess:\t\t\t[ %lu/%lu ]\n", succeed_cnt,
				total_count);
			printf("\tFailure:\t\t\t[ %lu/%lu ]\n",
				total_count - succeed_cnt, total_count);
			if (detail_flag) {
				printf("\tTotal extents:\t\t\t%4d->%d\n",
					extents_before_defrag,
					extents_after_defrag);
				printf("\tFragmented percentage:\t\t"
					"%3llu%%->%llu%%\n",
					!regular_count ? 0 :
					((unsigned long long)
					frag_files_before_defrag * 100) /
					regular_count,
					!regular_count ? 0 :
					((unsigned long long)
					frag_files_after_defrag * 100) /
					regular_count);
			}
			break;
		case FILENAME:
			total_count = 1;
			strncat(lost_found_dir, "/lost+found/",
				PATH_MAX - strnlen(lost_found_dir,
						   PATH_MAX));
			if (strncmp(lost_found_dir, dir_name,
				    strnlen(lost_found_dir,
					    PATH_MAX)) == 0) {
				PRINT_ERR_MSG(NGMSG_LOST_FOUND);
				PRINT_FILE_NAME(argv[i]);
				continue;
			}

			if (regional_flag) {
				fprintf(stderr, NGMSG_TYPE, argv[i]);
				continue;
			}
			printf("ext4 defragmentation for %s\n", argv[i]);
			/* Defrag single file process */
			ftw_fn(argv[i], &buf, FTW_F, NULL);
			if (succeed_cnt != 0)
				printf(" Success:\t\t\t[1/1]\n");
			else
				printf(" Success:\t\t\t[0/1]\n");

			break;
		}

		if (succeed_cnt != 0)
			success_flag = 1;
	}

	if (success_flag)
		return RETURN_OK;

	exit(1);

out:
	printf(MSG_USAGE);
	exit(1);
}

/*
 * insert_extent() -	Sequentially insert extent by physical block number.
 *
 * @extlist_head:	the head of an extent list.
 * @ext:		the extent element which will be inserted.
 */
int insert_extent(extent_t **extlist_head, extent_t *ext)
{
	extent_t	*ext_tmp = *extlist_head;

	if (ext == NULL)
		return RETURN_NG;

	/* First element */
	if (*extlist_head == NULL) {
		(*extlist_head) = ext;
		(*extlist_head)->prev = *extlist_head;
		(*extlist_head)->next = *extlist_head;
		return RETURN_OK;
	}

	if (ext->data.start <= ext_tmp->data.start) {
		/* Insert before head */
		if (ext_tmp->data.start < ext->data.start + ext->data.len)
			/* Overlap */
			return RETURN_NG;
		/* Adjust head */
		*extlist_head = ext;
	} else {
		/* Insert into the middle or last of the list */
		do {
			if (ext->data.start < ext_tmp->data.start)
				break;
			ext_tmp = ext_tmp->next;
		} while (ext_tmp != (*extlist_head));
		if (ext->data.start <
		    ext_tmp->prev->data.start + ext_tmp->prev->data.len)
			/* Overlap */
			return RETURN_NG;

		if (ext_tmp != *extlist_head &&
		    ext_tmp->data.start < ext->data.start + ext->data.len)
			/* Overlap */
			return RETURN_NG;
	}
	ext_tmp = ext_tmp->prev;
	/* Insert "ext" after "ext_tmp" */
	insert(ext_tmp, ext);
	return RETURN_OK;
}

/*
 * insert_exts_group() -	Insert a exts_group in decreasing order
 *				of length.
 *
 * @exts_group_list_head:	the head of a exts_group list.
 * @exts_group:			the exts_group element which will be inserted.
 */
int insert_exts_group(exts_group_t **exts_group_list_head,
		      exts_group_t *exts_group)
{
	exts_group_t	*exts_group_tmp = NULL;

	if (exts_group == NULL)
		return RETURN_NG;

	/* Initialize list */
	if (*exts_group_list_head == NULL) {
		(*exts_group_list_head) = exts_group;
		(*exts_group_list_head)->prev = *exts_group_list_head;
		(*exts_group_list_head)->next = *exts_group_list_head;
		return RETURN_OK;
	}

	if (exts_group->len >= (*exts_group_list_head)->len) {
		/* Insert before exts_group_list_head */
		exts_group_tmp = (*exts_group_list_head)->prev;
		insert(exts_group_tmp, exts_group);
		*exts_group_list_head = exts_group;
		return RETURN_OK;
	}

	/* Find insertion positon */
	for (exts_group_tmp = (*exts_group_list_head)->next;
	     exts_group_tmp != *exts_group_list_head;
	     exts_group_tmp = exts_group_tmp->next) {
		if (exts_group_tmp->len < exts_group->len)
			break;
	}
	exts_group_tmp = exts_group_tmp->prev;
	insert(exts_group_tmp, exts_group);

	return RETURN_OK;
}

/*
 * get_exts_group() -		Get element from the exts_group list.
 *
 * @exts_group_list_head:	the head of a exts_group list.
 * @exts_group:			the exts_group element which will be got.
 */
exts_group_t *get_exts_group(exts_group_t **exts_group_list_head,
			      exts_group_t *exts_group)
{
	if (exts_group == NULL || *exts_group_list_head == NULL)
		return NULL;

	/* Keep "exts_group_list_head" point to the largest extent group */
	if (exts_group == *exts_group_list_head)
		*exts_group_list_head = exts_group->next;

	if (*exts_group_list_head == (*exts_group_list_head)->next &&
	    exts_group == *exts_group_list_head)
		/* Delete the last element in the list */
		*exts_group_list_head = NULL;

	exts_group->prev->next = exts_group->next;
	exts_group->next->prev = exts_group->prev;
	return exts_group;
}

/*
 * free_exts_group() -		Free the exts_group.
 *
 * @*exts_group_list_head:	the exts_group list head which will be free.
 */
 void free_exts_group(exts_group_t *exts_group_list_head)
{
	exts_group_t *exts_group_tmp = NULL;

	if (exts_group_list_head == NULL)
		return;

	while (exts_group_list_head->next != exts_group_list_head) {
		exts_group_tmp = exts_group_list_head;
		exts_group_list_head->prev->next = exts_group_list_head->next;
		exts_group_list_head->next->prev = exts_group_list_head->prev;
		exts_group_list_head = exts_group_list_head->next;
		free(exts_group_tmp);
	}
	free(exts_group_list_head);
}

/*
 * free_ext() -		Free the extent list.
 *
 * @extent_list_head:	the extent list head of which will be free.
 */
void free_ext(extent_t *extent_list_head)
{
	extent_t *extent_tmp = NULL;

	if (extent_list_head == NULL)
		return;

	while (extent_list_head->next != extent_list_head) {
		extent_tmp = extent_list_head;
		extent_list_head->prev->next = extent_list_head->next;
		extent_list_head->next->prev = extent_list_head->prev;
		extent_list_head = extent_list_head->next;
		free(extent_tmp);
	}
	free(extent_list_head);
}

/*
 * do_defrag() -	Execute the defrag program in force mode.
 *
 * @fd:			the file's descriptor.
 * @exts_group:		the exts_group which will be defraged.
 * @defrag_data:	the data which will be defraged.
 */
int do_defrag(int fd, exts_group_t *exts_group,
	      struct ext4_ext_defrag_data defrag_data)
{
	int defraged_size = 0;
	int fadvise_ret = 0;
	unsigned long page_num;
	unsigned char *vec = NULL;

	/* Init defrag_data for defrag */
	defrag_data.ext.start = exts_group->start->data.start;
	defrag_data.ext.len = exts_group->len;
	defrag_data.ext.block = 0;
	defrag_data.defrag_size = exts_group->len;
	/* Move victim extents to make sufficient space */
	defrag_data.flag = DEFRAG_FORCE_GATHER;
	defrag_data.goal = exts_group->start->data.start;

	if (page_in_core(fd, defrag_data, &vec, &page_num) == RETURN_NG)
		return RETURN_NG;

	defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &defrag_data);

	/* Free pages */
	fadvise_ret = defrag_fadvise(fd, defrag_data, vec, page_num);
	FREE(vec);

	if (fadvise_ret == RETURN_NG || defraged_size < 0)
		return RETURN_NG;

	return defraged_size;
}

/*
 * get_used_extent() -	Get used extent in the block group.
 *
 * @fd:			the file's descriptor.
 * @ext_list_head:	the head of the extent list.
 * @istart:		start inode in the block group.
 * @iend:		end inode in the block group.
 * @bstart:		start block in the block group.
 * @bend:		end block in the block group.
 */
int get_used_extent(int fd, extent_t **ext_list_head,
		    unsigned long long istart, unsigned long long iend,
		    ext4_fsblk_t bstart, ext4_fsblk_t bend)
{
	int	ret = 0;
	int	blocksize;
	int	extents_buffer_size, fiemap_ino_size;
	__u64	pos = 0;
	__u64   group_start, group_end;
	unsigned long long	ino;
	struct	fiemap_extent *extents_buf = NULL;
	struct	fiemap_ino *fiemap_ino_buf = NULL;

	ret = ioctl(fd, FIGETBSZ, &blocksize);
	if (ret < 0) {
		if (detail_flag)
			perror(NGMSG_FILE_BLOCKSIZE);

		return RETURN_NG;
	}

	/* Convert units, in bytes.
	 * Be careful : now, physical block number in extent is 48bit,
	 * and the maximum blocksize for ext4 is 4K(12bit),
	 * so there is no overflow, but in future it may be changed.
	 */
	group_start = bstart * blocksize;
	group_end = bend * blocksize;

	/* Alloc space for fiemap_ino */
	fiemap_ino_size = sizeof(struct fiemap_ino);
	extents_buffer_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);

	fiemap_ino_buf = malloc(fiemap_ino_size + extents_buffer_size);
	if (!fiemap_ino_buf)
		return RETURN_NG;

	extents_buf = fiemap_ino_buf->fiemap.fm_extents;
	memset(fiemap_ino_buf, 0, fiemap_ino_size);
	fiemap_ino_buf->fiemap.fm_length = FIEMAP_MAX_OFFSET;
	fiemap_ino_buf->fiemap.fm_flags |= FIEMAP_FLAG_SYNC;
	fiemap_ino_buf->fiemap.fm_extent_count = EXTENT_MAX_COUNT;

	for (ino = istart; ino <= iend; ino++) {
		fiemap_ino_buf->ino = ino;
		pos = 0;
		do {
			int i;
			fiemap_ino_buf->fiemap.fm_start = pos;
			memset(extents_buf, 0, extents_buffer_size);

			ret = ioctl(fd, EXT4_IOC_FIEMAP_INO, fiemap_ino_buf);
			if (ret < 0) {
				/* Skip non-regular file and unused inode */
				if (errno == EOPNOTSUPP || errno == ESTALE
					|| errno == ENOENT || errno == EACCES)
					continue;
				goto out;
			}

			for (i = 0; i < fiemap_ino_buf->
					fiemap.fm_mapped_extents; i++) {
				extent_t        *extent = NULL;

				/* Is this extent in current block group ? */
				if (extents_buf[i].fe_physical < group_start ||
					extents_buf[i].fe_physical > group_end)
					continue;

				extent = malloc(sizeof(extent_t));
				if (extent == NULL)
					goto out;

				extent->data.start = extents_buf[i].fe_physical
							/ blocksize;
				extent->data.block = extents_buf[i].fe_logical
							/ blocksize;
				extent->data.len = extents_buf[i].fe_length
							/ blocksize;
				extent->ino = ino;
				extent->tag = EXT4_EXT_USE;

				if (insert_extent(ext_list_head, extent) < 0) {
					FREE(extent);
					goto out;
				}
			}

			/* Record file's logical offset this time */
			pos = extents_buf[EXTENT_MAX_COUNT-1].fe_logical +
				extents_buf[EXTENT_MAX_COUNT-1].fe_length;
		/* If fm_extents array has been filled and
		 * there are extents left, continue to cycle.
		 */
		} while (fiemap_ino_buf->fiemap.fm_mapped_extents
						== EXTENT_MAX_COUNT &&
			!(extents_buf[EXTENT_MAX_COUNT-1].fe_flags
						& FIEMAP_EXTENT_LAST));
	}

	FREE(fiemap_ino_buf);
	return RETURN_OK;
out:
	FREE(fiemap_ino_buf);
	return RETURN_NG;
}

/*
 * get_free_extent() -	Get used extent in the block group.
 *
 * @fd:			the file's descriptor.
 * @ino:		inode number from struct stat64.
 * @blocks_per_group:	the block number of each block group.
 * @ext_list_head:	the head of the extent list.
 */
int get_free_extent(int fd, unsigned long long ino,
		    int blocks_per_group, extent_t **ext_list_head)
{
	ext4_grpblk_t	pos = 0;
	struct ext4_extents_info	extents_info;

	memset(&extents_info, 0, sizeof(struct ext4_extents_info));
	extents_info.ino = ino;
	extents_info.max_entries = DEFRAG_MAX_ENT;
	while (pos < blocks_per_group) {
		int	i;
		if (ioctl(fd, EXT4_IOC_FREE_BLOCKS_INFO, &extents_info) < 0)
			return RETURN_NG;

		for (i = 0;
		     extents_info.ext[i].len != 0 && i < DEFRAG_MAX_ENT; i++) {
			/* Alloc space for extent */
			extent_t	*extent = NULL;

			extent = malloc(sizeof(extent_t));
			if (extent == NULL)
				return RETURN_NG;
			memset(extent, 0, sizeof(extent_t));
			memcpy(&(extent->data), &(extents_info.ext[i]),
			       sizeof(struct ext4_extent_data));
			/* Free extent */
			extent->tag = EXT4_EXT_FREE;
			if (insert_extent(ext_list_head, extent) < 0) {
				FREE(extent);
				return RETURN_NG;
			}
		}

		/* No free extent after the logical block number "pos".
		 * In other word, offset this time equals to prev recursion.
		 */
		if (pos == extents_info.g_offset)
			break;
		if (i < DEFRAG_MAX_ENT)
			break;
		/* Record the offset of logical block number this time */
		pos = extents_info.g_offset;
		memset(extents_info.ext, 0,
		       sizeof(struct ext4_extent_data) * DEFRAG_MAX_ENT);
	}

	return RETURN_OK;
}

/*
 * join_extents() -		Find continuous region(exts_group).
 *
 * @ext_list_head:		the head of the extent list.
 * @target_exts_group_list_head:the head of the target exts_group list.
 * @exts_group_list_head:	the head of the original exts_group list.
 * @filesize:			the file's descriptor.
 * @max:			the max size of free space.
 */
int join_extents(extent_t *ext_list_head,
		 exts_group_t **target_exts_group_list_head,
		 exts_group_t **exts_group_list_head,
		 unsigned long filesize, int *max)
{
	int len;
	extent_t *ext_start, *extent_tmp;

	ext_start = ext_list_head;
	extent_tmp = ext_list_head;
	*max = 0;
	len = ext_list_head->data.len;
	extent_tmp = extent_tmp->next;
	do {
		exts_group_t    *exts_group_tmp = NULL;

		if (len >= filesize) {
			/* Hit on the way,
			 * one extent group is enough for defrag, so return.
			 */
			exts_group_tmp = malloc(sizeof(exts_group_t));
			if (!exts_group_tmp)
				return RETURN_NG;

			exts_group_tmp->prev = NULL;
			exts_group_tmp->next = NULL;
			exts_group_tmp->start = ext_start;
			exts_group_tmp->end = extent_tmp->prev;
			exts_group_tmp->len = len;
			if (insert_exts_group(target_exts_group_list_head,
					      exts_group_tmp) < 0) {
				FREE(exts_group_tmp);
				return RETURN_NG;
			}
			return CHECK_FRAG_COUNT;
		}

		/* This extent and previous extent are not continuous,
		 * so, all previous extents are treated as an extent group.
		 */
		if ((extent_tmp->prev->data.start + extent_tmp->prev->data.len)
					    != extent_tmp->data.start) {
			exts_group_tmp = malloc(sizeof(exts_group_t));
			if (exts_group_tmp == NULL)
				return RETURN_NG;

			memset(exts_group_tmp, 0, sizeof(exts_group_t));
			exts_group_tmp->len = len;
			exts_group_tmp->start = ext_start;
			exts_group_tmp->end = extent_tmp->prev;

			if (insert_exts_group(exts_group_list_head,
					      exts_group_tmp) < 0) {
				FREE(exts_group_tmp);
				return RETURN_NG;
			}
			*max += len;
			ext_start = extent_tmp;
			len = extent_tmp->data.len;
			extent_tmp = extent_tmp->next;
			continue;
		}

		/* This extent and previous extent are continuous,
		 * so, they belong to the same extent group, and we check
		 * if the next extent belongs to the same extent group.
		 */
		len += extent_tmp->data.len;
		extent_tmp = extent_tmp->next;
	} while (extent_tmp != ext_list_head->next);

	return RETURN_OK;
}

/*
 * find_exts_group() -			Find target exts_group.
 *
 * @ext_count:				the number of extents.
 * @filesize:				the file's size.
 * @exts_group_list_head:		the head of the original exts_group list
 * @target_exts_group_list_head:	the head of the target exts_group list.
 */
int find_exts_group(int	*ext_count, unsigned long filesize,
		    exts_group_t **exts_group_list_head,
		    exts_group_t **target_exts_group_list_head)
{
	/* Blocks we found for target file */
	int len = 0;

	if (!(*exts_group_list_head))
		return RETURN_NG;

	while (*exts_group_list_head) {
		exts_group_t	*exts_group_tmp = NULL;

		/* Even add the current largest extent group,
		 * the sapce we found is still not enough for the file,
		 * so just add the current largest extent group,
		 * and do the next loop.
		 */
		if ((*exts_group_list_head)->len + len < filesize) {
			len += (*exts_group_list_head)->len;
			exts_group_tmp = get_exts_group(exts_group_list_head,
				*exts_group_list_head);
			if (insert_exts_group(target_exts_group_list_head,
				      exts_group_tmp) < 0) {
				FREE(exts_group_tmp);
				return RETURN_NG;
			}
			(*ext_count)++;
			continue;
		}

		/* Now, if add the current largest extent group,
		 * the space we found is enough,
		 * So, search from the smallest extent group
		 * to avoid waste of space
		 */
		exts_group_tmp = (*exts_group_list_head)->prev;
		do {
			if (exts_group_tmp->len + len >= filesize) {
				len += exts_group_tmp->len;
				exts_group_tmp =
				get_exts_group(exts_group_list_head,
					       exts_group_tmp);
				if (insert_exts_group
					(target_exts_group_list_head,
					 exts_group_tmp) < 0) {
					FREE(exts_group_tmp);
					return RETURN_NG;
				}
				(*ext_count)++;
				/* The only entry go out normally */
				return RETURN_OK;
			}
			exts_group_tmp = exts_group_tmp->prev;
		} while (exts_group_tmp !=
			 (*exts_group_list_head)->prev);

		/* If we come here, there must be something wrong */
		return RETURN_NG;
	}

	/* If we come here, there must be something wrong(space not enough) */
	return RETURN_NG;
}

/*
 * check_frag_count() -		Check file frag.
 *
 * @fd:				the file's discriptor.
 * @extent_count:		the number of extents.
 */
int check_frag_count(int fd, int extent_count)
{
	long long file_extent_count = RETURN_NG;

	file_extent_count = file_frag_count(fd);
	if (file_extent_count == RETURN_NG)
		return RETURN_NG;

	if (extent_count >= file_extent_count) {
		/* No improvment */
		errno = ENOSPC;
		return RETURN_NG;
	}

	return RETURN_OK;
}

/*
 * defrag_proc() -		Reserve extent group and execute
 *				the defrag program.
 *
 * @fd:				the file's discriptor.
 * @file_name			file name.
 * @target_exts_group_head:	the head of the original exts_group list.
 * @ino:			inode number from struct stat64.
 */
int defrag_proc(int fd, const char *file_name,
		exts_group_t *target_exts_group_head, unsigned long long ino)
{
	int ret = 0;
	int flag = 0;
	int count = 0;
	int percent = 0;
	int blocksize = 0;
	struct stat64	buf;
	exts_group_t 			*exts_group = NULL;
	extent_t			*extent = NULL;
	struct ext4_extents_info	extents_info;
	struct ext4_ext_defrag_data	defrag_data;

	if (!target_exts_group_head)
		return RETURN_NG;

	/* Get file size */
	memset(&buf, 0, sizeof(struct stat64));
	ret = fstat64(fd, &buf);
	if (ret < 0) {
		if (detail_flag) {
			printf("\033[79;0H\033[K[%lu/%lu]%s\n",
					defraged_file_count,
					total_count, file_name);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
		}
		return RETURN_NG;
	}

	/* Get block size */
	ret = ioctl(fd, FIGETBSZ, &blocksize);
	if (ret < 0) {
		if (detail_flag) {
			printf("\033[79;0H\033[K[%lu/%lu]%s\n",
					defraged_file_count,
					total_count, file_name);
			PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_BLOCKSIZE);
		}
		return RETURN_NG;
	}

	memset(&extents_info, 0, sizeof(extents_info));
	memset(&defrag_data, 0, sizeof(struct ext4_ext_defrag_data));
	extents_info.ino = 0;
	exts_group = target_exts_group_head;
	extents_info.max_entries = DEFRAG_MAX_ENT;
	extents_info.ino = ino;
	defrag_data.start_offset = 0;

	do {
		/* Move victim extents to make sufficient space */
		extent = exts_group->start;
		int flush_count = 0;
		do {
			if (extent->tag != EXT4_EXT_USE) {
				extent = extent->next;
				continue;
			}
			extents_info.ino = extent->ino;
			extents_info.goal = fgoal;
			memcpy(extents_info.ext, &extent->data,
			       sizeof(struct ext4_extent_data));

			extent = extent->next;
			extents_info.entries = 1;
			ret = ioctl(fd, EXT4_IOC_MOVE_VICTIM, &extents_info);
			if (ret < 0) {
				if (errno == EACCES && detail_flag) {
					printf("\033[79;0H\033[K[%lu/%lu]%s\n",
					defraged_file_count,
					total_count, file_name);
					PRINT_ERR_MSG_WITH_ERRNO
						(NGMSG_VICTIM_UID);
				}
				return ret;
			}
			count++;
			if (detail_flag) {
				if (!flag) {
					printf(" Move victim extents to");
					printf(" the other block group\n");
					flag = 1;
				}
				/* moved extent info */
				if (!flush_count) {
					/* flush the defrag process info */
					printf("\033[79;0H\033[K ext%-3d",
						count);
					printf(" phys:\t%8llu logical:\t%8u "
						"length:\t%8d\n",
						(extents_info.ext[0]).start,
						(extents_info.ext[0]).block,
						(extents_info.ext[0]).len);
					flush_count = 1;
				} else {
					printf(" ext%-3d phys:\t%8llu logical:"
						"\t%8u length:\t%8d\n", count,
						(extents_info.ext[0]).start,
						(extents_info.ext[0]).block,
						(extents_info.ext[0]).len);
				}
			}
		} while (extent != exts_group->end->next);

		if (fsync(fd) < 0) {
			if (detail_flag) {
				printf("\033[79;0H\033[K[%lu/%lu]%s\n",
					defraged_file_count,
					total_count, file_name);
				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_SYNC);
			}
			return ret;
		}

		/* Init extents_info for ioctl (RESERVE) */
		extents_info.entries = 1;
		extents_info.ext[0].block = exts_group->start->data.block;
		extents_info.ext[0].start = exts_group->start->data.start;
		extents_info.ext[0].len = exts_group->len;

		/* Defrag */
		ret = do_defrag(fd, exts_group, defrag_data);
		if (ret < 0) {
			if (detail_flag) {
				printf("\033[79;0H\033[K[%lu/%lu]%s\n",
					defraged_file_count,
					total_count, file_name);
				printf("\tDEFRAG_ERROR ret = %d\t[ NG ]\n",
					ret);
			}
			return ret;
		}
		/* Defrag success, so update offset */
		defrag_data.start_offset += ret;
		ret = defrag_data.start_offset;

		/* Print process progress */
		{
			percent = ((long long)ret * blocksize * 100) /
				  buf.st_size;
			printf("\033[79;0H\033[K[%lu/%lu]%s:\t%d%%",
				defraged_file_count, total_count,
				file_name, min(percent, 100));
			fflush(stdout);
		}

		exts_group = exts_group->next;
	} while (exts_group != target_exts_group_head);
	return ret;
}

/*
 * force_defrag() -	Execute the defrag program in force defrag mode.
 *
 * @fd:			the file's descriptor.
 * @file_name		file name.
 * @buf:		a pointer of the struct stat64.
 * @blocksize:		block size in byte.
 */
int force_defrag(int fd, const char *file_name,
		const struct stat64 *buf, int blocksize)
{
	int     ret = 0;
	int     exts = 0;
	int     maxlen = 0;
	unsigned int    gnumber;
	unsigned long   filesize;
	unsigned long long	istart, iend;
	ext4_fsblk_t	bstart, bend;
	extent_t	*extlist_head = NULL;
	exts_group_t	*exts_group_list_head, *exts_group_list_target_head;
	struct ext4_super_block sb;

	exts_group_list_head = NULL;
	exts_group_list_target_head = NULL;

	/* Get super block info */
	memset(&sb, 0, sizeof(struct ext4_super_block));

	if (ioctl(fd, EXT4_IOC_SUPER_INFO, &sb) < 0)
		return RETURN_NG;

	sb.s_blocks_per_group = ext2fs_swab32(sb.s_blocks_per_group);
	sb.s_inodes_per_group = ext2fs_swab32(sb.s_inodes_per_group);

	gnumber = (buf->st_ino - 1) / sb.s_inodes_per_group;
	istart = gnumber * sb.s_inodes_per_group;
	iend = istart + sb.s_inodes_per_group - 1;
	bstart = gnumber * sb.s_blocks_per_group;
	bend = bstart + sb.s_blocks_per_group - 1;

	/* Compute filesize in block */
	filesize = (buf->st_size + blocksize - 1) / blocksize;

	/* Get used extents in the block group */
	ret = get_used_extent(fd, &extlist_head, istart, iend, bstart, bend);
	if (ret == RETURN_NG)
		goto freelist;

	/* Get free extents in the group */
	ret = get_free_extent(fd, buf->st_ino,
			     sb.s_blocks_per_group, &extlist_head);
	if (ret == RETURN_NG)
		goto freelist;

	/* All space in this group is used by other groups' inodes */
	if (extlist_head == NULL) {
		ret = RETURN_NG;
		goto freelist;
	}

	/* Get continuous region(extents group) */
	ret = join_extents(extlist_head, &exts_group_list_target_head,
				  &exts_group_list_head, filesize, &maxlen);
	if (ret == RETURN_NG)
		goto freelist;
	if (ret == CHECK_FRAG_COUNT) {
		exts = 1;
		goto frag_check;
	}

	if (maxlen < filesize) {
		/* No enough space */
		errno = ENOSPC;
		ret = RETURN_NG;
		goto freelist;
	}

	if (!exts_group_list_head) {
		ret = RETURN_NG;
		goto freelist;
	}

	/* Find target extents group */
	ret = find_exts_group(&exts, filesize, &exts_group_list_head,
				      &exts_group_list_target_head);
	if (ret == RETURN_NG)
		goto freelist;

frag_check:
	/* Check file extent count */
	ret = check_frag_count(fd, exts);
	if (ret == RETURN_NG)
		goto freelist;

	/* Reserve extent group and execute the defrag program */
	ret = defrag_proc(fd, file_name,
			exts_group_list_target_head, buf->st_ino);

freelist:
	free_exts_group(exts_group_list_target_head);
	free_exts_group(exts_group_list_head);
	free_ext(extlist_head);
	return ret;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ