lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <200712271112.AA00290@TNESG9526.rs.jp.nec.com>
Date:	Thu, 27 Dec 2007 20:12:02 +0900
From:	Akira Fujita <a-fujita@...jp.nec.com>
To:	linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org
Subject: [RFC][PATCH 5/5] ext4 online defrag command

- The defrag command. Usage is as follows:
o Put the multiple files closer together.
  # e4defrag -r directory-name
o Defrag for free space fragmentation.
  # e4defrag -f file-name
o Defrag for a single file.
  # e4defrag file-name
o Defrag for all files on ext4.
  # e4defrag device-name

Signed-off-by: Takashi Sato <t-sato@...jp.nec.com>
Signed-off-by: Akira Fujita <a-fujita@...jp.nec.com>
---
/*
 * e4defrag.c - ext4 filesystem defragmenter
 */

#ifndef _LARGEFILE_SOURCE
#define _LARGEFILE_SOURCE
#endif

#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif

#define _XOPEN_SOURCE	500
#define _GNU_SOURCE
#include <ftw.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <dirent.h>
#include <limits.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/statfs.h>
#include <sys/vfs.h>
#include <sys/ioctl.h>
#include <mntent.h>
#include <linux/fs.h>
#include <ctype.h>
#include <sys/syscall.h>
#include <sys/mman.h>

#define EXT4_IOC_DEFRAG		_IOW('f', 10, struct ext4_ext_defrag_data)
#define EXT4_IOC_GROUP_INFO	_IOW('f', 11, struct ext4_group_data_info)
#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info)
#define EXT4_IOC_EXTENTS_INFO	_IOW('f', 13, struct ext4_extents_info)
#define EXT4_IOC_RESERVE_BLOCK	_IOW('f', 14, struct ext4_extents_info)
#define EXT4_IOC_MOVE_VICTIM	_IOW('f', 15, struct ext4_extents_info)
#define EXT4_IOC_BLOCK_RELEASE	_IO('f', 16)

#define DEFRAG_MAX_ENT	32

/* Extent status which are used in extent_t */
#define EXT4_EXT_USE		0
#define EXT4_EXT_FREE		1
#define EXT4_EXT_RESERVE	2

/* Insert list2 after list1 */
#define insert(list1, list2)			\
	do {					\
		list2->next = list1->next;	\
		list1->next->prev = list2;	\
		list2->prev = list1;		\
		list1->next = list2;		\
	} while (0)

#define DEFRAG_FORCE_VICTIM	2

/* Magic number for ext4 */
#define EXT4_SUPER_MAGIC	0xEF53

/* Force defrag mode: Max file size in bytes (128MB) */
#define	MAX_FILE_SIZE		(unsigned long)1 << 27

/* Data type for filesystem-wide blocks number */
#define  ext4_fsblk_t unsigned long long

/* Ioctl command */
#define EXT4_IOC_FIBMAP		_IOW('f', 9, ext4_fsblk_t)

#ifndef __NR_fadvise64
#define __NR_fadvise64		250
#endif

#ifndef __NR_sync_file_range
#define __NR_sync_file_range	314
#endif

#ifndef POSIX_FADV_DONTNEED
#if defined(__s390x__)
#define POSIX_FADV_DONTNEED	6 /* Don't need these pages.  */
#else
#define POSIX_FADV_DONTNEED	4 /* Don't need these pages.  */
#endif
#endif

#ifndef SYNC_FILE_RANGE_WAIT_BEFORE
#define SYNC_FILE_RANGE_WAIT_BEFORE	1
#endif
#ifndef SYNC_FILE_RANGE_WRITE
#define SYNC_FILE_RANGE_WRITE		2
#endif
#ifndef SYNC_FILE_RANGE_WAIT_AFTER
#define SYNC_FILE_RANGE_WAIT_AFTER	4
#endif

#define DEVNAME			0
#define DIRNAME			1
#define FILENAME		2

#define RETURN_OK		0
#define RETURN_NG		-1
#define FTW_CONT		0
#define FTW_OPEN_FD		2000
#define FILE_CHK_OK		0
#define FILE_CHK_NG		-1
#define FS_EXT4			"ext4dev"
#define ROOT_UID		0
#define CHECK_FRAG_COUNT	1

/* Defrag block size, in bytes */
#define DEFRAG_SIZE		67108864

#define min(x, y) (((x) > (y)) ? (y) : (x))

#define PRINT_ERR_MSG(msg)	fprintf(stderr, "%s\n", (msg));
#define PRINT_FILE_NAME(file)	fprintf(stderr, "\t\t    \"%s\"\n", (file));

#define MSG_USAGE		\
"Usage : e4defrag [-v] file...| directory...| device...\n\
      : e4defrag -f file [blocknr] \n\
      : e4defrag -r directory... | device... \n"

#define MSG_R_OPTION		" with regional block allocation mode.\n"
#define NGMSG_MTAB		"\te4defrag  : Can not access /etc/mtab."
#define NGMSG_UNMOUNT		"\te4defrag  : FS is not mounted."
#define NGMSG_EXT4		"\te4defrag  : FS is not ext4 File System."
#define NGMSG_FS_INFO		"\te4defrag  : get FSInfo fail."
#define NGMSG_FILE_INFO		"\te4defrag  : get FileInfo fail."
#define NGMSG_FILE_OPEN		"\te4defrag  : open fail."
#define NGMSG_FILE_SYNC		"\te4defrag  : sync(fsync) fail."
#define NGMSG_FILE_DEFRAG	"\te4defrag  : defrag fail."
#define NGMSG_FILE_BLOCKSIZE	"\te4defrag  : can't get blocksize."
#define NGMSG_FILE_FIBMAP	"\te4defrag  : can't get block number."
#define NGMSG_FILE_UNREG	"\te4defrag  : File is not regular file."

#define NGMSG_FILE_LARGE	\
	"\te4defrag  : Defrag size is larger than FileSystem's free space."

#define NGMSG_FILE_PRIORITY	\
"\te4defrag  : File is not current user's file or current user is not root."

#define NGMSG_FILE_LOCK		"\te4defrag  : File is locked."
#define NGMSG_FILE_BLANK	"\te4defrag  : File size is 0."
#define NGMSG_GET_LCKINFO	"\te4defrag  : get LockInfo fail."
#define NGMSG_TYPE		\
	"e4defrag  : Can not process %s in regional mode\n."
#define NGMSG_REALPATH		"\te4defrag  : Can not get full path."
#define NGMSG_FILE_MAP		"\te4defrag  : get file map fail."
#define NGMSG_FILE_DROP_BUFFER	"\te4defrag  : free page fail."
#define NGMSG_FADVISE_SYSCALL	"\tfadvise fail."

struct ext4_extent_data {
	unsigned long long block;	/* start logical block number */
	ext4_fsblk_t start;		/* start physical block number */
	int len;			/* blocks count */
};

/* Used for defrag */
struct ext4_ext_defrag_data {
	ext4_fsblk_t start_offset;	/* start offset to defrag in blocks */
	ext4_fsblk_t defrag_size;	/* size of defrag in blocks */
	ext4_fsblk_t goal;		/* block offset for allocation */
	int flag;			/* free space mode flag */
	struct ext4_extent_data ext;
};

struct ext4_group_data_info {
	int s_blocks_per_group;		/* blocks per group */
	int s_inodes_per_group;		/* inodes per group */
};

struct ext4_extents_info {
	unsigned long long ino;		/* inode number */
	int max_entries;		/* maximum extents count */
	int entries; 	 		/* extent number/count */
	unsigned long offset;
	ext4_fsblk_t goal;
	struct ext4_extent_data ext[DEFRAG_MAX_ENT];
};

typedef struct extent {
	struct extent *prev;
	unsigned long tag;		/* Extent status */
	unsigned long ino;		/* File's inode number */
	struct ext4_extent_data data;	/* Extent belong to file */
	struct extent *next;
} extent_t;

typedef struct exts_group {
	struct exts_group *prev;
	extent_t *start;		/* Start ext */
	extent_t *end;			/* End ext */
	int len;			/* Length of this continuous region */
	struct exts_group *next;
} exts_group_t;

typedef struct extent_wrap {
	struct extent_wrap *prev, *next;
	struct extent *group_ext;
} extent_wrap_t;

int	force_flag;
int	detail_flag;
int	regional_flag;
int	amount_cnt;
int	succeed_cnt;
ext4_fsblk_t	goal;
ext4_fsblk_t	fgoal = -1;

/**
 * fadvise() -		advise operater system process page cache.
 *
 * @fd:			file descriptor.
 * @offset:		file offset.
 * @len:		area length.
 * @advise:		process flag.
 */
int fadvise(int fd, loff_t offset, size_t len, int advise)
{
	return syscall(__NR_fadvise64, fd, offset, len, advise);
}

/**
 * sync_file_range() -	sync file region.
 *
 * @fd:			file descriptor.
 * @offset:		file offset.
 * @length:		area length.
 * @advise:		process flag.
 */
int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag)
{
	return syscall(__NR_sync_file_range, fd, offset, length, flag);
}

/**
 * page_in_core() -	get information on whether pages are in core.
 *
 * @fd:			file descriptor.
 * @defrag_data:	data used for defrag.
 * @vec:		page state array.
 * @page_num:		page number.
 */
int page_in_core(int fd, struct ext4_ext_defrag_data defrag_data,
		 unsigned char **vec, unsigned long *page_num)
{
	int blocksize;
	int pagesize = getpagesize();
	void *page = NULL;
	loff_t offset, end_offset, length;

	if (vec == NULL || *vec != NULL) {
		return RETURN_NG;
	}

	if (ioctl(fd, FIGETBSZ, &blocksize) < 0) {
		return RETURN_NG;
	}

	/*in mmap, offset should be a multiple of the page size */
	offset = defrag_data.start_offset * blocksize;
	length = defrag_data.defrag_size * blocksize;
	end_offset = offset + length;
	/* round the offset down to the nearest multiple of pagesize */
	offset = (offset / pagesize) * pagesize;
	length = end_offset - offset;

	page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
	if (page == MAP_FAILED) {
		return RETURN_NG;
	}

	*page_num = 0;
	*page_num = (length + pagesize - 1) / pagesize;
	*vec = (unsigned char *)calloc(*page_num, 1);
	if (*vec == NULL) {
		return RETURN_NG;
	}

	/* get information on whether pages are in core */
	if (mincore(page, (size_t)length, *vec) == -1) {
		if (*vec) {
			free(*vec);
		}
		return RETURN_NG;
	}

	if (munmap(page, length) == -1) {
		if (*vec) {
			free(*vec);
		}
		return RETURN_NG;
	}

	return RETURN_OK;
}

/**
 * defrag_fadvise() -	predeclare an access pattern for file data.
 *
 * @fd:			file descriptor.
 * @defrag_data:	data used for defrag.
 * @vec:		page state array.
 * @page_num:		page number.
 */
int defrag_fadvise(int fd, struct ext4_ext_defrag_data defrag_data,
		   unsigned char *vec, unsigned long page_num)
{
	int flag = 1;
	int blocksize;
	int pagesize = getpagesize();
	int fadvise_flag = POSIX_FADV_DONTNEED;
	int sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE|
			SYNC_FILE_RANGE_WAIT_AFTER;
	unsigned long i;
	loff_t offset;

	if (ioctl(fd, FIGETBSZ, &blocksize) < 0) {
		return RETURN_NG;
	}

	offset = (loff_t)defrag_data.start_offset * blocksize;
	offset = (offset / pagesize) * pagesize;

	/* sync file for fadvise process */
	if (sync_file_range(fd, offset, (loff_t)pagesize*page_num, sync_flag)
	    != 0) {
		return RETURN_NG;
	}

	/* try to release buffer cache this process used,
	 * then other process can use the released buffer */
	for (i = 0; i < page_num; i++) {
		if ((vec[i] & 0x1) == 0) {
			offset += pagesize;
			continue;
		}
		if (fadvise(fd, offset, pagesize, fadvise_flag) != 0) {
			if (detail_flag && flag) {
				perror(NGMSG_FADVISE_SYSCALL);
				flag = 0;
			}
		}
		offset += pagesize;
	}

	return RETURN_OK;
}
/**
 * check_free_size() -	Check if there's enough disk space.
 *
 * @fd:			the file's descriptor.
 * @buf:		the pointer of the struct stat64.
 */
int check_free_size(int fd, const struct stat64 *buf)
{
	off64_t	size = 0;
	off64_t	free_size = 0;
	struct statfs	fsbuf;

	/* target file size */
	size = buf->st_size;

	if (fstatfs(fd, &fsbuf) < 0) {
		if (detail_flag) {
			perror(NGMSG_FS_INFO);
		}
		return RETURN_NG;
	}

	/* Compute free space for root and normal user separately */
	if (getuid() == ROOT_UID) {
		free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bfree;
	} else {
		free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bavail;
	}

	if (free_size >= size) {
		return RETURN_OK;
	}

	return RETURN_NG;
}

int file_check(int fd, const struct stat64 *buf, const char *file_name);
int force_defrag(int fd, const struct stat64 *buf, int blocksize);

/**
 * ftw_fn() -           Check file attributes and ioctl call to avoid
 * 			illegal operations.
 *
 * @file:		the file's name.
 * @buf:		the pointer of the struct stat64.
 * @flag:		file type.
 * @ftwbuf:		the pointer of a struct FTW.
 */
int
ftw_fn(const char *file, const struct stat64 *buf, int flag, struct FTW *ftwbuf)
{
	int	fd;
	int	blocksize;
	int	percent = 0;
	int	defraged_size = 0;
	int	ret = RETURN_NG;
	int	pos, file_frags_start, file_frags_end;
	unsigned long	page_num;
	unsigned char	*vec = NULL;
	loff_t	start = 0;
	struct ext4_ext_defrag_data	df_data;
	struct ext4_extents_info	extents_info;

	if (flag == FTW_F) {
		amount_cnt++;
		if ((fd = open64(file, O_RDONLY)) < 0) {
			if (detail_flag) {
				perror(NGMSG_FILE_OPEN);
				PRINT_FILE_NAME(file);
			}
			return FTW_CONT;
		}

		if (file_check(fd, buf, file) == FILE_CHK_NG) {
			close(fd);
			return FTW_CONT;
		}

		if (fsync(fd) < 0) {
			if (detail_flag) {
				perror(NGMSG_FILE_SYNC);
				PRINT_FILE_NAME(file);
			}
			close(fd);
			return FTW_CONT;
		}
		/* Get blocksize */
		if (ioctl(fd, FIGETBSZ, &blocksize) < 0) {
			if (detail_flag) {
				perror(NGMSG_FILE_BLOCKSIZE);
				PRINT_FILE_NAME(file);
			}
			close(fd);
			return FTW_CONT;
		}
		/* Ioctl call does the actual defragment job. */
		df_data.start_offset = 0;
		df_data.goal = goal;
		df_data.ext.len = 0;

		if (force_flag)
			df_data.flag = 1;

		/* count file frags before defrag if detail_flag set */
		if (detail_flag) {
			pos = 0;
			file_frags_start = 0;
			memset(&extents_info, 0,
				sizeof(struct ext4_extents_info));
			extents_info.ino = buf->st_ino;
			extents_info.max_entries = DEFRAG_MAX_ENT;
			extents_info.entries = 0;

			do {
				extents_info.entries += pos;
				pos = extents_info.entries;
				ret = ioctl(fd, EXT4_IOC_EXTENTS_INFO,
					    &extents_info);
				if (ret < 0) {
					perror(NGMSG_FILE_INFO);
					PRINT_FILE_NAME(file);
					close(fd);
					return FTW_CONT;
				}
				file_frags_start += extents_info.entries;
			} while (extents_info.entries == DEFRAG_MAX_ENT &&
				 ret == 0);
		}

		/* print process progress */
		printf("\tprocessing -------> %s:\n", file);
		percent = (start * 100) / buf->st_size;
		printf("\033[79;16H\033[K progressing ====> %d%%", percent);
		fflush(stdout);

		while (1) {
			df_data.defrag_size =
					(min((buf->st_size - start),
					     DEFRAG_SIZE) + blocksize - 1) /
					blocksize;

			ret = page_in_core(fd, df_data, &vec, &page_num);
			if (ret == RETURN_NG) {
				if (detail_flag) {
					perror(NGMSG_FILE_MAP);
					PRINT_FILE_NAME(file);
				} else {
					printf("\n");
				}
				close(fd);
				return FTW_CONT;
			}

			/* EXT4_IOC_DEFRAG */
			defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &df_data);

			/* free page */
			ret = defrag_fadvise(fd, df_data, vec, page_num);
			if (vec) {
				free(vec);
				vec = NULL;
			}
			if (ret == RETURN_NG) {
				if (detail_flag) {
					perror(NGMSG_FILE_DROP_BUFFER);
					PRINT_FILE_NAME(file);
				} else {
					printf("\n");
				}
				close(fd);
				return FTW_CONT;
			}

			if ((defraged_size < 0) && (force_flag == 1) &&
			 (errno == ENOSPC) && buf->st_size <= MAX_FILE_SIZE) {
				defraged_size = force_defrag(fd, buf,
							     blocksize);
				if (defraged_size * blocksize >= buf->st_size) {
					/* Whole file is enforcedly defraged */
					break;
				} else {
					defraged_size = RETURN_NG;
				}
			}
			if (defraged_size < 0) {
				if (detail_flag) {
					perror(NGMSG_FILE_DEFRAG);
					PRINT_FILE_NAME(file);
				} else {
					printf("\n");
				}
				close(fd);
				return FTW_CONT;
			}
			df_data.start_offset += defraged_size;
			start = df_data.start_offset * blocksize;

			/* print process progress */
			if (start > ((long long)1 << 56)) {
			/* consider overflow("start * 100" beyond 64bits) */
				start = start >> 8;
				percent = (start * 100) / (buf->st_size >> 8);
			} else {
				percent = (start * 100) / buf->st_size;
			}

			/* disk space file used is bigger than logical size */
			if (percent > 100) {
				percent = 100;
			}
			printf("\033[79;16H\033[K progressing ====> %d%%",
				percent);
			fflush(stdout);

			/* End of file */
			if (start >= buf->st_size) {
				break;
			}
		}

		/* count file frags after defrag and print extents info */
		if (detail_flag) {
			pos = 0;
			ret = RETURN_NG;
			file_frags_end = 0;
			extents_info.entries = 0;

			do {
				extents_info.entries += pos;
				pos = extents_info.entries;
				ret = ioctl(fd, EXT4_IOC_EXTENTS_INFO,
					    &extents_info);
				if (ret < 0) {
					printf("\n");
					perror(NGMSG_FILE_INFO);
					PRINT_FILE_NAME(file);
					close(fd);
					return FTW_CONT;
				}
				file_frags_end += extents_info.entries;
			} while (extents_info.entries == DEFRAG_MAX_ENT &&
				 ret == 0);

			printf("\n\t\textents: %d ==> %d",
				file_frags_start, file_frags_end);
		}
		printf("\n");
		close(fd);
		succeed_cnt++;
	} else {
		if (detail_flag) {
			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
			PRINT_FILE_NAME(file);
		}
	}

	return FTW_CONT;
}

/**
 * file_check() -       Check file's attributes.
 *
 * @fd:			the file's descriptor.
 * @buf:		a pointer of the struct stat64.
 * @file_name:		the file's name.
 */
int file_check(int fd, const struct stat64 *buf, const char *file_name)
{
	struct flock	lock;

	lock.l_type = F_WRLCK; /* Write-lock check is more reliable. */
	lock.l_start = 0;
	lock.l_whence = SEEK_SET;
	lock.l_len = 0;

	/* Regular file */
	if (S_ISREG(buf->st_mode) == 0) {
		if (detail_flag) {
			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
			PRINT_FILE_NAME(file_name);
		}
		return FILE_CHK_NG;
	}

	/* Free space */
	if (check_free_size(fd, buf) == RETURN_NG) {

		if (detail_flag) {
			PRINT_ERR_MSG(NGMSG_FILE_LARGE);
			PRINT_FILE_NAME(file_name);
		}
		return FILE_CHK_NG;
	}

	/* Priority */
	if (getuid() != ROOT_UID &&
		buf->st_uid != getuid()) {
		if (detail_flag) {
			PRINT_ERR_MSG(NGMSG_FILE_PRIORITY);
			PRINT_FILE_NAME(file_name);
		}
		return FILE_CHK_NG;
	}

	/* Lock status */
	if (fcntl(fd, F_GETLK, &lock) < 0) {
		if (detail_flag) {
			perror(NGMSG_GET_LCKINFO);
			PRINT_FILE_NAME(file_name);
		}
		return FILE_CHK_NG;
	} else if (lock.l_type != F_UNLCK) {
		if (detail_flag) {
			PRINT_ERR_MSG(NGMSG_FILE_LOCK);
			PRINT_FILE_NAME(file_name);
		}
		return FILE_CHK_NG;
	}

	/* Empty file */
	if (buf->st_size == 0) {
		if (detail_flag) {
			PRINT_ERR_MSG(NGMSG_FILE_BLANK);
			PRINT_FILE_NAME(file_name);
		}
		return FILE_CHK_NG;
	}

	return FILE_CHK_OK;
}

/**
 * is_ext4() -		Whether on an ext4 filesystem.
 *
 * @filename:		the file's name.
 */
int is_ext4(const char *filename)
{
	int 	maxlen, len;
	FILE	*fp = NULL;
	char	*mnt_type = NULL;
	char	*mtab = MOUNTED;	/* Refer to /etc/mtab */
	char	file_path[PATH_MAX];
	struct mntent	*mnt = NULL;
	struct statfs	buffs;

	/* Get full path */
	if (realpath(filename, file_path) == NULL) {
		perror(NGMSG_REALPATH);
		PRINT_FILE_NAME(filename);
		return RETURN_NG;
	}

	if (statfs(file_path, &buffs) < 0) {
		perror(NGMSG_FS_INFO);
		PRINT_FILE_NAME(filename);
		return RETURN_NG;
	}

	if (buffs.f_type != EXT4_SUPER_MAGIC) {
		PRINT_ERR_MSG(NGMSG_EXT4);
		return RETURN_NG;
	}

	if ((fp = setmntent(mtab, "r")) == NULL) {
		perror(NGMSG_MTAB);
		return RETURN_NG;
	}

	maxlen = 0;
	while ((mnt = getmntent(fp)) != NULL) {
		len = strlen(mnt->mnt_dir);
		if (memcmp(file_path, mnt->mnt_dir, len) == 0) {
			if (maxlen < len) {
				maxlen = len;
				mnt_type = realloc(mnt_type,
						   strlen(mnt->mnt_type) + 1);
				if (!mnt_type) {
					endmntent(fp);
					return RETURN_NG;
				}
				strcpy(mnt_type, mnt->mnt_type);
			}
		}
	}

	if (strcmp(mnt_type, FS_EXT4) == 0) {
		endmntent(fp);
		if (mnt_type) {
			free(mnt_type);
		}
		return RETURN_OK;
	} else {
		endmntent(fp);
		if (mnt_type) {
			free(mnt_type);
		}
		PRINT_ERR_MSG(NGMSG_EXT4);
		return RETURN_NG;
	}
}

/**
 * get_mount_point() -	Get device's mount point.
 *
 * @devname:		the device's name.
 * @mount_point:	the mount point.
 * @dir_path_len:	the length of directory.
 */
int get_mount_point(const char *devname, char *mount_point, int dir_path_len)
{
	char	*mtab = MOUNTED;	/* Refer to /etc/mtab */
	FILE	*fp = NULL;
	struct mntent	*mnt = NULL;

	if ((fp = setmntent(mtab, "r")) == NULL) {
		perror(NGMSG_MTAB);
		return RETURN_NG;
	}

	while ((mnt = getmntent(fp)) != NULL) {
		if (strcmp(devname, mnt->mnt_fsname) == 0) {
			endmntent(fp);
			if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
				strncpy(mount_point, mnt->mnt_dir,
					dir_path_len);
				return RETURN_OK;
			}
			PRINT_ERR_MSG(NGMSG_EXT4);
			return RETURN_NG;
		}
	}
	endmntent(fp);
	PRINT_ERR_MSG(NGMSG_UNMOUNT);
	return RETURN_NG;
}

/**
 * main() -		ext4 online defrag.
 *
 * @argc:		the number of parameter.
 * @argv[]:		the pointer array of parameter.
 */
int main(int argc, char *argv[])
{
	int	fd;
	int	ret;
	int	opt;
	int	i, flags;
	int	arg_type;
	int	detail_tmp;
	int	success_flag;
	char	dir_name[PATH_MAX];
	struct stat64	buf;

	i = 1;
	flags = 0;
	arg_type = -1;
	detail_tmp = -1;
	success_flag = 0;
	flags |= FTW_PHYS;	/* Do not follow symlink */
	flags |= FTW_MOUNT;	/* Stay within the same filesystem */
	/* Parse arguments */
	if (argc == 1 || (argc == 2 && argv[1][0] == '-')) {
		printf(MSG_USAGE);
		exit(1);
	}

	while ((opt = getopt(argc, argv, "rvf")) != EOF) {
		switch (opt) {
		case 'r':
			regional_flag = 1;
			i = 2;
			break;
		case 'v':
			detail_flag = 1;
			i = 2;
			break;
		case 'f':
			force_flag = 1;
			i = 2;

			if (argc > 4) {
				printf("Illegal argument\n\n");
				printf(MSG_USAGE);
				exit(1);
			}

			if (argc == 4) {
				int res_strlen;
				res_strlen = strlen(argv[3]);
				for (res_strlen -= 1; res_strlen >= 0;
								res_strlen--) {
					if (!isdigit(argv[3][res_strlen])) {
						printf("Illegal argument\n\n");
						printf(MSG_USAGE);
						exit(1);
					}
				}

				fgoal = strtoul(argv[3], NULL, 0);
				if (errno) {
					printf("block num shold be < 32bit\n");
					exit(1);
				}
			}
			if (!fgoal)
				fgoal = -1;
			break;
		default:
			printf(MSG_USAGE);
			exit(1);
		}
	}

	/* Main process */
	for (; i < argc; i++) {
		amount_cnt = 0;
		succeed_cnt = 0;
		memset(dir_name, 0, PATH_MAX);

		if (force_flag && i == 3)
			break;

		if (lstat64(argv[i], &buf) < 0) {
			perror(NGMSG_FILE_INFO);
			PRINT_FILE_NAME(argv[i]);
			continue;
		}

		/* Regular file is acceptalbe with force mode */
		if (force_flag && !S_ISREG(buf.st_mode)) {
			printf("Inappropriate file type \n\n");
			printf(MSG_USAGE);
			exit(1);
		}

		/* Block device */
		if (S_ISBLK(buf.st_mode)) {
			arg_type = DEVNAME;
			if (get_mount_point(argv[i], dir_name, PATH_MAX) ==
				RETURN_NG) {
				continue;
			}
			printf("Start defragment for device(%s)\n", argv[i]);
		} else if (S_ISDIR(buf.st_mode)) {
			/* Directory */
			arg_type = DIRNAME;
			if (access(argv[i], R_OK) < 0) {
				perror(argv[i]);
				continue;
			}
			strcpy(dir_name, argv[i]);
		} else if (S_ISREG(buf.st_mode)) {
			/* Regular file */
			arg_type = FILENAME;
		} else {
			/* Irregular file */
			PRINT_ERR_MSG(NGMSG_FILE_UNREG);
			PRINT_FILE_NAME(argv[i]);
			continue;
		}

		/* Device's ext4 check is in get_mount_point() */
		if (arg_type == FILENAME || arg_type == DIRNAME) {
			if (is_ext4(argv[i]) == RETURN_NG) {
				continue;
			}
		}

		switch (arg_type) {
		case DIRNAME:
			printf("Start defragment for directory(%s)\n",
				argv[i]);
		case DEVNAME:
			/* Regional block allocation */
			if (regional_flag) {
				printf(MSG_R_OPTION);

				if ((fd = open64(dir_name, O_RDONLY)) < 0) {
					if (detail_flag) {
						perror(NGMSG_FILE_OPEN);
						PRINT_FILE_NAME(dir_name);
					}
					continue;
				}

				goal = 0;
				if ((ret = ioctl(fd, EXT4_IOC_FIBMAP,
							 &goal)) != 0) {
					perror(NGMSG_FILE_FIBMAP);
					PRINT_FILE_NAME(dir_name);
					close(fd);
					continue;
				}
				close(fd);
			}

			/* File tree walk */
			nftw64(dir_name, ftw_fn, FTW_OPEN_FD, flags);
			printf("\tTotal:\t\t%12d\n", amount_cnt);
			printf("\tSuccess:\t%12d\n", succeed_cnt);
			printf("\tFailure:\t%12d\n",
				amount_cnt - succeed_cnt);
			break;
		case FILENAME:
			if (regional_flag) {
				fprintf(stderr, NGMSG_TYPE, argv[i]);
				continue;
			}
			detail_tmp = detail_flag;
			detail_flag = 1;
			printf("Start defragment for %s\n", argv[i]);
			/* Single file process */
			ftw_fn(argv[i], &buf, FTW_F, NULL);
			if (succeed_cnt != 0) {
				printf(
				"\tSUCCESS\t:file defrag success.\n"
				);
			}
			detail_flag = detail_tmp;
			break;
		}

		if (succeed_cnt != 0)
			success_flag = 1;
	}

	if (success_flag)
		return RETURN_OK;

	exit(1);
}
/**
 * insert_extent() -	Sequentially insert extent by physical block number.
 *
 * @extlist_head:	the head of an extent list.
 * @ext:		the extent element which will be inserted.
 */
int insert_extent(extent_t **extlist_head, extent_t *ext)
{
	extent_t	*ext_tmp = *extlist_head;

	if (ext == NULL) {
		return RETURN_NG;
	}
	/* First element */
	if (*extlist_head == NULL) {
		(*extlist_head) = ext;
		(*extlist_head)->prev = *extlist_head;
		(*extlist_head)->next = *extlist_head;
		return RETURN_OK;
	}

	if (ext->data.start <= ext_tmp->data.start) {
		/* Insert before head */
		if (ext_tmp->data.start < ext->data.start + ext->data.len) {
			/* Overlap */
			return RETURN_NG;
		}
		/* Adjust head */
		*extlist_head = ext;
	} else {
		/* Insert into the middle or last of the list */
		do {
			if (ext->data.start < ext_tmp->data.start) {
				break;
			}
			ext_tmp = ext_tmp->next;
		} while (ext_tmp != (*extlist_head));
		if (ext->data.start <
		    ext_tmp->prev->data.start + ext_tmp->prev->data.len) {
			/* Overlap */
			return RETURN_NG;
		}
		if (ext_tmp != *extlist_head &&
		    ext_tmp->data.start < ext->data.start + ext->data.len) {
			/* Overlap */
			return RETURN_NG;
		}
	}
	ext_tmp = ext_tmp->prev;
	/* Insert "ext" after "ext_tmp" */
	insert(ext_tmp, ext);
	return RETURN_OK;
}

/**
 * insert_exts_group() -	Insert a exts_group in decreasing order of length.
 *
 * @exts_group_list_head:	the head of a exts_group list.
 * @exts_group:			the exts_group element which will be inserted.
 */
int insert_exts_group(exts_group_t **exts_group_list_head,
		      exts_group_t *exts_group)
{
	exts_group_t	*exts_group_tmp = NULL;

	if (exts_group == NULL) {
		return RETURN_NG;
	}

	/* Initialize list */
	if (*exts_group_list_head == NULL) {
		(*exts_group_list_head) = exts_group;
		(*exts_group_list_head)->prev = *exts_group_list_head;
		(*exts_group_list_head)->next = *exts_group_list_head;
		return RETURN_OK;
	}

	if (exts_group->len >= (*exts_group_list_head)->len) {
		/* Insert before exts_group_list_head */
		exts_group_tmp = (*exts_group_list_head)->prev;
		insert(exts_group_tmp, exts_group);
		*exts_group_list_head = exts_group;
		return RETURN_OK;
	}

	/* Find insertion positon */
	for (exts_group_tmp = (*exts_group_list_head)->next;
	     exts_group_tmp != *exts_group_list_head;
	     exts_group_tmp = exts_group_tmp->next) {
		if (exts_group_tmp->len < exts_group->len) {
			break;
		}
	}
	exts_group_tmp = exts_group_tmp->prev;
	insert(exts_group_tmp, exts_group);

	return RETURN_OK;
}

/**
 * get_exts_group() -		Get element from the exts_group list.
 *
 * @exts_group_list_head:	the head of a exts_group list.
 * @exts_group:			the exts_group element which will be geted.
 */
exts_group_t *get_exts_group(exts_group_t **exts_group_list_head,
			      exts_group_t *exts_group)
{
	if (exts_group == NULL || *exts_group_list_head == NULL) {
		return NULL;
	}
	/* Keep "exts_group_list_head" point to the largest extent group*/
	if (exts_group == *exts_group_list_head) {
		*exts_group_list_head = exts_group->next;
	}
	if (*exts_group_list_head == (*exts_group_list_head)->next &&
	    exts_group == *exts_group_list_head) {
		/* Delete the last element in the list */
		*exts_group_list_head = NULL;
	}
	exts_group->prev->next = exts_group->next;
	exts_group->next->prev = exts_group->prev;
	return exts_group;
}

/**
 * free_exts_group() -		Free the exts_group.
 *
 * @*exts_group_list_head:	the exts_group list head which will be free.
 */

 void free_exts_group(exts_group_t *exts_group_list_head)
{
	exts_group_t *exts_group_tmp = NULL;

	if (exts_group_list_head == NULL) {
		return;
	}
	while (exts_group_list_head->next != exts_group_list_head) {
		exts_group_tmp = exts_group_list_head;
		exts_group_list_head->prev->next = exts_group_list_head->next;
		exts_group_list_head->next->prev = exts_group_list_head->prev;
		exts_group_list_head = exts_group_list_head->next;
		free(exts_group_tmp);
	}
	free(exts_group_list_head);
}

/**
 * free_ext() -		Free the extent list.
 *
 * @extent_list_head:	the extent list head of which will be free.
 */
void free_ext(extent_t *extent_list_head)
{
	extent_t *extent_tmp = NULL;

	if (extent_list_head == NULL) {
		return;
	}
	while (extent_list_head->next != extent_list_head) {
		extent_tmp = extent_list_head;
		extent_list_head->prev->next = extent_list_head->next;
		extent_list_head->next->prev = extent_list_head->prev;
		extent_list_head = extent_list_head->next;
		free(extent_tmp);
	}
	free(extent_list_head);
}

/**
 * move_wrap() -	Move a ext_wrap from one list to another.
 *
 * @from:		the list which will be moved from.
 * @to:			the list which will be moved to.
 * @entry:		the ext_wrap which will be moved.
 */
int move_wrap(extent_wrap_t **from, extent_wrap_t **to,
		   extent_wrap_t *entry)
{
	if (!to || !entry) {
		return RETURN_NG;
	}
	if (from && *from == entry) {
		if ((*from)->next == *from) {
			*from = NULL;
		} else {
			*from = (*from)->next;
		}
	}
	entry->next->prev = entry->prev;
	entry->prev->next = entry->next;
	if (!(*to)) {
		*to = entry;
		(*to)->prev = (*to)->next = *to;
	} else {
		entry->next = *to;
		entry->prev = (*to)->prev;
		(*to)->prev->next = entry;
		(*to)->prev = entry;
	}
	return RETURN_OK;
}

/**
 * mark_wrap() -	Mark extent status as "EXT4_EXT_RESERVE".
 *
 * @ext_wrap_list:	the ext_wrap list which will be marked.
 */
void mark_wrap(extent_wrap_t *ext_wrap_list)
{
	extent_wrap_t *wrap = ext_wrap_list;

	if (!ext_wrap_list) {
		return;
	}
	do {
		wrap->group_ext->tag |= EXT4_EXT_RESERVE;
		wrap = wrap->next;
	} while (wrap != ext_wrap_list);
}

/**
 * free_wrap_list() -	Free the ext_wrap list.
 *
 * @ext_wrap_head:	the ext_wrap list head which will be free.
 */
void free_wrap_list(extent_wrap_t **ext_wrap_head)
{
	extent_wrap_t *wrap, *ext_wrap_tmp;

	if (!ext_wrap_head || !(*ext_wrap_head)) {
		return;
	}
	wrap = *ext_wrap_head;
	do {
		ext_wrap_tmp = wrap;
		wrap = wrap->next;
		free(ext_wrap_tmp);
	} while (wrap != *ext_wrap_head);
	*ext_wrap_head = NULL;
}

/**
 * do_defrag() -	Execute the defrag program.
 *
 * @fd:			the file's descriptor.
 * @exts_group:		the exts_group which will be defraged.
 * @defrag_data:	the data which will be defraged.
 */
static inline int do_defrag(int fd, exts_group_t *exts_group,
			    struct ext4_ext_defrag_data defrag_data)
{
	int ret = 0;
	int defraged_size = 0;
	int fadvise_ret = 0;
	unsigned long page_num;
	unsigned char *vec = NULL;
	extent_t *extent = NULL;

	/* Defrag */
	defrag_data.ext.start = exts_group->start->data.start;
	defrag_data.ext.len = exts_group->len;
	defrag_data.ext.block = 0;
	defrag_data.defrag_size = exts_group->len;
	defrag_data.flag = DEFRAG_FORCE_VICTIM;
	defrag_data.goal = exts_group->start->data.start;

	if (page_in_core(fd, defrag_data, &vec, &page_num) == RETURN_NG) {
		return RETURN_NG;
	}

	defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &defrag_data);

	/* free pages */
	fadvise_ret = defrag_fadvise(fd, defrag_data, vec, page_num);
	if (vec) {
		free(vec);
	}
	if (fadvise_ret == RETURN_NG || defraged_size < 0) {
		return RETURN_NG;
	}

	/* Release reserved sign */
	extent = exts_group->start;
	do {
		extent->tag &= ~EXT4_EXT_RESERVE;
		extent = extent->next;
	} while (extent != exts_group->end->next);

	ret += defraged_size;

	return ret;
}

/**
 * get_used_extent() -	Get used extent in the block group.
 *
 * @fd:			the file's descriptor.
 * @ext_list_head:	the head of the extent list.
 * @istart:		the start of the inode.
 * @iend:		the end of the inode.
 * @bstart:		the start of the block.
 * @bend:		the end of the block.
 */
int get_used_extent(int fd, extent_t **ext_list_head,
		    unsigned long long istart, unsigned long long iend,
		    unsigned long long bstart, unsigned long long bend)
{
	int	pos = 0;
	int	ret = 0;
	unsigned long long	inode;
	struct ext4_extents_info	extents_info;

	memset(&extents_info, 0, sizeof(struct ext4_extents_info));
	extents_info.max_entries = DEFRAG_MAX_ENT;

	for (inode = istart; inode <= iend; inode++) {
		extents_info.ino = inode;
		extents_info.entries = 0;
		pos = 0;
		do {
			/* Get extents info */
			int i;
			extents_info.entries += pos;/* Offset */
			pos = extents_info.entries;
			memset(extents_info.ext, 0,
			       sizeof(struct ext4_extent_data) *
			       DEFRAG_MAX_ENT);
			ret = ioctl(fd, EXT4_IOC_EXTENTS_INFO, &extents_info);
			if (ret < 0) {
				if (errno == ENOENT) {
					continue;
				} else {
					/* Without ENOENT case*/
					return RETURN_NG;
				}
			}

			for (i = 0; i < extents_info.entries; i++) {
				extent_t	*extent = NULL;
				/* Is this extent in current block group? */
				if (extents_info.ext[i].start < bstart ||
				    extents_info.ext[i].start > bend) {
					continue;
				}
				extent = malloc(sizeof(extent_t));
				if (extent == NULL) {
					return RETURN_NG;
				}
				memset(extent, 0, sizeof(extent_t));
				memcpy(&(extent->data), &extents_info.ext[i],
				       sizeof(struct ext4_extent_data));
				extent->ino = inode;
				if (insert_extent(ext_list_head, extent) < 0) {
					if (extent) {
						free(extent);
					}
					return RETURN_NG;
				}
			}
		} while (extents_info.entries == DEFRAG_MAX_ENT && ret == 0);
	}

	if (ret < 0) {
		if (errno == ENOENT) {
			return RETURN_OK;
		}
	}
	return ret;
}

/**
 * get_free_extent() -	Get used extent in the block group.
 *
 * @fd:			the file's descriptor.
 * @inode:		inode number from struct stat64.
 * @blocks_per_group:	the block number of each block group.
 * @ext_list_head:	the head of the extent list.
 */
int get_free_extent(int fd, unsigned long long inode,
		    int blocks_per_group, extent_t **ext_list_head)
{
	int	pos = 0;
	struct ext4_extents_info	extents_info;

	memset(&extents_info, 0, sizeof(struct ext4_extents_info));
	extents_info.ino = inode;
	extents_info.max_entries = DEFRAG_MAX_ENT;
	while (pos < blocks_per_group) {
		int	i;
		if (ioctl(fd, EXT4_IOC_FREE_BLOCKS_INFO, &extents_info) < 0) {
			return RETURN_NG;
		}
		/*
		 * No free extent after the logical block number "pos".
		 * In other word, offset this time equals to prev recursion.
		 */
		for (i = 0;
		     extents_info.ext[i].len != 0 && i < DEFRAG_MAX_ENT; i++) {
			/* Alloc list node store extent */
			extent_t	*extent = NULL;
			extent = malloc(sizeof(extent_t));
			if (extent == NULL) {
				return RETURN_NG;
			}
			memset(extent, 0, sizeof(extent_t));
			memcpy(&(extent->data), &(extents_info.ext[i]),
			       sizeof(struct ext4_extent_data));
			extent->tag = EXT4_EXT_FREE;/* Free extent */
			if (insert_extent(ext_list_head, extent) < 0) {
				if (extent) {
					free(extent);
				}
				return RETURN_NG;
			}
		}
		/*
		 * No free extent after the logical block number "pos".
		 * In other word, offset this time equals to prev recursion.
		 */
		if (pos == extents_info.offset) {
			break;
		}
		if (i < DEFRAG_MAX_ENT) {
			break;
		}
		/* Record the offset of logical block number this time */
		pos = extents_info.offset;
		memset(extents_info.ext, 0,
		       sizeof(struct ext4_extent_data) * DEFRAG_MAX_ENT);
	}

	return RETURN_OK;
}

/**
 * join_extents() -		Find continuous region(exts_group).
 *
 * @ext_list_head:		the head of the extent list.
 * @target_exts_group_list_head:the head of the target exts_group list.
 * @exts_group_list_head:	the head of the original exts_group list.
 * @filesize:			the file's descriptor.
 * @max:			the max size of free space.
 */
int join_extents(extent_t *ext_list_head,
		 exts_group_t **target_exts_group_list_head,
		 exts_group_t **exts_group_list_head,
		 unsigned long filesize, int *max)
{
	int len;
	extent_t *ext_start, *extent_tmp;

	ext_start = extent_tmp = ext_list_head;
	*max = 0;
	len = ext_list_head->data.len;
	extent_tmp = extent_tmp->next;
	do {
		if (len >= filesize) {
			/*
			 * Hit on the way,
			 * one extent group is enough for defrag, so return.
			 */
			exts_group_t	*exts_group_tmp = NULL;
			exts_group_tmp = malloc(sizeof(exts_group_t));
			if (!exts_group_tmp) {
				return RETURN_NG;
			}
			exts_group_tmp->prev = exts_group_tmp->next = NULL;
			exts_group_tmp->start = ext_start;
			exts_group_tmp->end = extent_tmp->prev;
			exts_group_tmp->len = len;
			if (insert_exts_group(target_exts_group_list_head,
					      exts_group_tmp) < 0) {
				if (exts_group_tmp) {
					free(exts_group_tmp);
				}
				return RETURN_NG;
			}
			return CHECK_FRAG_COUNT;
		}
		/*
		 * This extent and previous extent is not continuous,
		 * so, all previous extents is treated as an extent group.
		 */
		if ((extent_tmp->prev->data.start + extent_tmp->prev->data.len)
		    != extent_tmp->data.start) {
			exts_group_t	*exts_group_tmp = NULL;
			exts_group_tmp = malloc(sizeof(exts_group_t));
			if (exts_group_tmp == NULL) {
				return RETURN_NG;
			}
			memset(exts_group_tmp, 0, sizeof(exts_group_t));
			exts_group_tmp->len = len;
			exts_group_tmp->start = ext_start;
			exts_group_tmp->end = extent_tmp->prev;

			if (insert_exts_group(exts_group_list_head,
					      exts_group_tmp) < 0) {
				if (exts_group_tmp) {
					free(exts_group_tmp);
				}
				return RETURN_NG;
			}
			*max += len;
			ext_start = extent_tmp;
			len = extent_tmp->data.len;
			extent_tmp = extent_tmp->next;
			continue;
		}
		/*
		 * This extent and previous extent is continuous,
		 * so, they belong to the same extent group, and we check
		 * if the next extent belong to the same extent group.
		 */
		len += extent_tmp->data.len;
		extent_tmp = extent_tmp->next;
	} while (extent_tmp != ext_list_head->next);

	return RETURN_OK;
}

/**
 *find_exts_group() -			Find target exts_group.
 *
 * @ext_count:				the number of extents.
 * @filesize:				the file's size.
 * @exts_group_list_head:		the head of the original exts_group list
 * @target_exts_group_list_head:	the head of the target exts_group list.
 */
int find_exts_group(int	*ext_count, unsigned long filesize,
		    exts_group_t **exts_group_list_head,
		    exts_group_t **target_exts_group_list_head)
{
	int len;

	len = 0;/* Blocks we found for target file */

	if (!(*exts_group_list_head)) {
		return RETURN_NG;
	}

	while (*exts_group_list_head) {
		exts_group_t	*exts_group_tmp;
		if ((*exts_group_list_head)->len + len >= filesize) {
			/*
			 * Search from the smallest extent group
			 * to avoid waste of space
			 */
			exts_group_tmp = (*exts_group_list_head)->prev;
			do {
				if (exts_group_tmp->len + len >= filesize) {
					len += exts_group_tmp->len;
					exts_group_tmp =
					get_exts_group(exts_group_list_head,
						       exts_group_tmp);
					if (insert_exts_group
						(target_exts_group_list_head,
						 exts_group_tmp) < 0) {
						if (exts_group_tmp) {
							free(exts_group_tmp);
						}
						return RETURN_NG;
					}
					(*ext_count)++;
					/* The only entry go out normally*/
					return RETURN_OK;
				}
				exts_group_tmp = exts_group_tmp->prev;
			} while (exts_group_tmp !=
				 (*exts_group_list_head)->prev);
		}
		len += (*exts_group_list_head)->len;
		exts_group_tmp = get_exts_group(exts_group_list_head,
						*exts_group_list_head);
		if (insert_exts_group(target_exts_group_list_head,
				      exts_group_tmp) < 0) {
			if (exts_group_tmp) {
				free(exts_group_tmp);
			}
			return RETURN_NG;
		}
		(*ext_count)++;
	}

	return RETURN_NG;
}

/**
 * check_frag_count() -		Check file frag.
 *
 * @fd:				the file's discriptor.
 * @inode:			inode number from struct stat64.
 * @extent_count:		the number of extents.
 */
int check_frag_count(int fd, unsigned long long inode, int extent_count)
{
	int ret, pos, file_extent_count;
	struct ext4_extents_info	extents_info;

	/* Count file exts */
	memset(&extents_info, 0, sizeof(struct ext4_extents_info));
	file_extent_count = 0;/* Extents count of file */
	extents_info.ino = inode;
	extents_info.max_entries = DEFRAG_MAX_ENT;
	extents_info.entries = 0;
	pos = 0;
	ret = 0;

	do {
		extents_info.entries += pos;
		pos = extents_info.entries;
		ret = ioctl(fd, EXT4_IOC_EXTENTS_INFO, &extents_info);
		if (ret < 0) {
			return RETURN_NG;
		}
		file_extent_count += extents_info.entries;
	} while (extents_info.entries == DEFRAG_MAX_ENT && ret == 0);

	if (extent_count >= file_extent_count) {
		/* No improvment */
		errno = ENOSPC;
		return RETURN_NG;
	}

	return RETURN_OK;
}

/**
 * defrag_proc() -		Reserve extent group and execute the defrag program
 *
 * @fd:				the file's discriptor.
 * @target_exts_group_head:	the head of the original exts_group list.
 * @inode:			inode number from struct stat64.
 */
int defrag_proc(int fd, exts_group_t *target_exts_group_head,
		unsigned long long inode)
{
	int ret = 0;
	int percent = 0;
	int blocksize = 0;
	int data_len = 0;
	struct stat64	buf;
	exts_group_t 			*exts_group;
	extent_t			*extent;
	struct ext4_extents_info	extents_info;
	struct ext4_ext_defrag_data	defrag_data;
	extent_wrap_t *wrap_list = NULL;

	/* Reserve free extents */
	if (!target_exts_group_head) {
		/* Fault */
		return RETURN_NG;
	}

	/* get file size */
	memset(&buf, 0, sizeof(struct stat64));
	ret = fstat64(fd, &buf);
	if (ret < 0) {
		perror(NGMSG_FILE_INFO);
		return RETURN_NG;
	}
	/* get block size */
	ret = ioctl(fd, FIGETBSZ, &blocksize);
	if (ret < 0) {
		perror(NGMSG_FILE_BLOCKSIZE);
		return RETURN_NG;
	}
	memset(&extents_info, 0, sizeof(extents_info));
	memset(&defrag_data, 0, sizeof(struct ext4_ext_defrag_data));

	extents_info.ino = 0;
	exts_group = target_exts_group_head;
	extents_info.max_entries = DEFRAG_MAX_ENT;
	extents_info.ino = inode;
	ext4_fsblk_t data_block = 0;
	ext4_fsblk_t data_start = 0;
	defrag_data.start_offset = 0;

	do {
		extent_wrap_t	*wrap;
		extent = exts_group->start;
		data_len = 0;
		data_start = extent->data.start;
		data_block = extent->data.block;
		do {
			data_len += extent->data.len;
			if (extent->tag != EXT4_EXT_USE) {
				extent->tag = EXT4_EXT_RESERVE;
				extent = extent->next;
				continue;
			}
			extents_info.ino = extent->ino;
			extents_info.goal = fgoal;
			memcpy(extents_info.ext, &extent->data,
			       sizeof(struct ext4_extent_data));
			wrap = malloc(sizeof(extent_wrap_t));
			if (!wrap) {
				goto release_blocks;
			}
			wrap->group_ext = extent;
			wrap->next = wrap->prev = wrap;
			if (move_wrap(NULL, &wrap_list, wrap) < 0) {
				if (wrap) {
					free(wrap);
				}
				goto release_blocks;
			}
			extent = extent->next;
			extents_info.entries = 1;
			ret = ioctl(fd, EXT4_IOC_MOVE_VICTIM, &extents_info);
			if (ret < 0) {
				goto release_blocks;
			}
			mark_wrap(wrap_list);
			free_wrap_list(&wrap_list);
		} while (extent != exts_group->end->next);

		if (fsync(fd) < 0) {
			if (detail_flag) {
				perror(NGMSG_FILE_SYNC);
			}
			return ret;
		}

		extents_info.entries = 1;
		extents_info.ext[0].block = data_block;
		extents_info.ext[0].start = data_start;
		extents_info.ext[0].len = exts_group->len;
		ret = ioctl(fd, EXT4_IOC_RESERVE_BLOCK, &extents_info);
		if (ret < 0) {
			printf("RESERVE_ERROR ret = %d\n", ret);
			printf("block is already used\n");
			goto release_blocks;
		}
		ret = do_defrag(fd, exts_group, defrag_data);
		if (ret < 0) {
			printf("DEFRAG_ERROR ret = %d\n", ret);
			goto release_blocks;
		}
		defrag_data.start_offset += ret;
		ret = defrag_data.start_offset;

		/* print process progress */
		if (detail_flag) {
			percent = ((long long)ret * blocksize * 100) /
				  buf.st_size;
			if (percent > 100) {
				percent = 100;
			}
			printf("\033[79;16H\033[K progressing ====> %d%%",
			percent);
			fflush(stdout);
		}

		exts_group = exts_group->next;
	} while (exts_group != target_exts_group_head);
	return ret;

release_blocks:
	free_wrap_list(&wrap_list);
	ret = ioctl(fd, EXT4_IOC_BLOCK_RELEASE);
	if (ret < 0) {
		return RETURN_NG;
	}

	return ret;
}

/**
 * force_defrag() -	Execute the defrag program in force mode.
 *
 * @fd:			the file's descriptor.
 * @buf:		a pointer of the struct stat64.
 * @blocksize:		block size in byte.
 */
int force_defrag(int fd, const struct stat64 *buf, int blocksize)
{
	int     ret = 0;
	int     exts = 0;
	int     maxlen = 0;
	unsigned int    gnumber;
	unsigned long   filesize;
	unsigned long long      istart, iend;
	ext4_fsblk_t    bstart, bend;
	extent_t	*extlist_head = NULL;
	exts_group_t	*exts_group_list_head, *exts_group_list_target_head;
	struct ext4_group_data_info	ext4_group_data;

	exts_group_list_head = exts_group_list_target_head = NULL;

	/* Get group info */
	memset(&ext4_group_data, 0, sizeof(struct ext4_group_data_info));
	if (ioctl(fd, EXT4_IOC_GROUP_INFO, &ext4_group_data) < 0) {
		return RETURN_NG;
	}

	gnumber = (buf->st_ino - 1) / ext4_group_data.s_inodes_per_group;
	istart = gnumber * ext4_group_data.s_inodes_per_group;
	iend = istart + ext4_group_data.s_inodes_per_group - 1;
	bstart = gnumber * ext4_group_data.s_blocks_per_group;
	bend = bstart + ext4_group_data.s_blocks_per_group - 1;

	/* Compute filesize in block */
	filesize = (buf->st_size + blocksize - 1) / blocksize;

	/* Get used extents in the block group */
	ret = get_used_extent(fd, &extlist_head, istart, iend, bstart, bend);
	if (ret == RETURN_NG) {
		goto freelist;
	}

	/* Get free extents in the group */
	ret = get_free_extent(fd, buf->st_ino,
			     ext4_group_data.s_blocks_per_group, &extlist_head);
	if (ret == RETURN_NG) {
		goto freelist;
	}

	/* All space in this group is used by other groups' inodes */
	if (extlist_head == NULL) {
		ret = RETURN_NG;
		goto freelist;
	}

	/* Get continuous region(extents group) */
	ret = join_extents(extlist_head, &exts_group_list_target_head,
				  &exts_group_list_head, filesize, &maxlen);
	if (ret == RETURN_NG) {
		goto freelist;
	}
	if (ret == CHECK_FRAG_COUNT) {
		exts = 1;
		goto frag_check;
	}

	if (maxlen < filesize) {
		/* No enough space */
		errno = ENOSPC;
		ret = RETURN_NG;
		goto freelist;
	}

	if (!exts_group_list_head) {
		ret = RETURN_NG;
		goto freelist;
	}

	/* Find target extents group */
	ret = find_exts_group(&exts, filesize, &exts_group_list_head,
				      &exts_group_list_target_head);
	if (ret == RETURN_NG) {
		goto freelist;
	}

frag_check:
	/* Check file extent count*/
	ret = check_frag_count(fd, buf->st_ino, exts);
	if (ret == RETURN_NG) {
		goto freelist;
	}

	/* Reserve extent group and execute the defrag program */
	ret = defrag_proc(fd, exts_group_list_target_head, buf->st_ino);

freelist:
	free_exts_group(exts_group_list_target_head);
	free_exts_group(exts_group_list_head);
	free_ext(extlist_head);
	return ret;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ