lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1204942396.14884.131.camel@localhost.localdomain>
Date:	Fri, 07 Mar 2008 18:13:16 -0800
From:	Mingming Cao <cmm@...ibm.com>
To:	Akira Fujita <a-fujita@...jp.nec.com>
Cc:	tytso@....edu, linux-ext4@...r.kernel.org,
	linux-fsdevel@...r.kernel.org
Subject: Re:[PATCH 3/5] ext4: online defrag -- Move the file data to the
	new blocks


ext4: online defrag -- Move the file data to the new blocks
> 
> From: Akira Fujita <a-fujita@...jp.nec.com>
> 
> Move the blocks on the temporary inode to the original inode
> by a page.
> 1. Read the file data from the old blocks to the page
> 2. Move the block on the temporary inode to the original inode
> 3. Write the file data on the page into the new blocks
> 
> Signed-off-by: Akira Fujita <a-fujita@...jp.nec.com>
> Signed-off-by: Takashi Sato <t-sato@...jp.nec.com>
> 
This patch is a bit of too big to review, will do it later....it would helpful to add more
comments as always, and small patches are prefered...

Mingming

> --
>  fs/ext4/defrag.c  |  738 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/ext4/extents.c |    2 +-
>  fs/ext4/inode.c   |    3 +-
>  3 files changed, 740 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
> index 6121705..19be87a 100644
> --- a/fs/ext4/defrag.c
> +++ b/fs/ext4/defrag.c
> @@ -127,6 +127,623 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
>  }
> 
>  /**
> + * ext4_defrag_merge_across_blocks - Merge extents across leaf block
> + *
> + * @handle	journal handle
> + * @inode	target file's inode
> + * @o_start	first original extent to be defraged
> + * @o_end	last original extent to be defraged
> + * @start_ext	first new extent to be merged
> + * @new_ext	middle of new extent to be merged
> + * @end_ext	last new extent to be merged
> + * @flag	defrag mode (e.g. -f)
> + *
> + * This function returns 0 if succeed, otherwise returns error value.
> + */
> +static int
> +ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
> +		struct ext4_extent *o_start,
> +		struct ext4_extent *o_end, struct ext4_extent *start_ext,
> +		struct ext4_extent *new_ext, struct ext4_extent *end_ext,
> +		int flag)
> +{
> +	struct ext4_ext_path *org_path = NULL;
> +	ext4_lblk_t eblock = 0;
> +	int err = 0;
> +	int new_flag = 0;
> +	int end_flag = 0;
> +	int defrag_flag;
> +
> +	if (flag == DEFRAG_FORCE_VICTIM)
> +		defrag_flag = 1;
> +	else
> +		defrag_flag = 0;
> +
> +	if (le16_to_cpu(start_ext->ee_len) &&
> +		le16_to_cpu(new_ext->ee_len) &&
> +		le16_to_cpu(end_ext->ee_len)) {
> +
> +		if ((o_start) == (o_end)) {
> +
> +			/*       start_ext   new_ext    end_ext
> +			 * dest |---------|-----------|--------|
> +			 * org  |------------------------------|
> +			 */
> +
> +			end_flag = 1;
> +		} else {
> +
> +			/*       start_ext   new_ext   end_ext
> +			 * dest |---------|----------|---------|
> +			 * org  |---------------|--------------|
> +			 */
> +
> +			o_end->ee_block = end_ext->ee_block;
> +			o_end->ee_len = end_ext->ee_len;
> +			ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
> +		}
> +
> +		o_start->ee_len = start_ext->ee_len;
> +		new_flag = 1;
> +
> +	} else if ((le16_to_cpu(start_ext->ee_len)) &&
> +			(le16_to_cpu(new_ext->ee_len)) &&
> +			(!le16_to_cpu(end_ext->ee_len)) &&
> +			((o_start) == (o_end))) {
> +
> +		/*	 start_ext	new_ext
> +		 * dest |--------------|---------------|
> +		 * org  |------------------------------|
> +		 */
> +
> +		o_start->ee_len = start_ext->ee_len;
> +		new_flag = 1;
> +
> +	} else if ((!le16_to_cpu(start_ext->ee_len)) &&
> +			(le16_to_cpu(new_ext->ee_len)) &&
> +			(le16_to_cpu(end_ext->ee_len)) &&
> +			((o_start) == (o_end))) {
> +
> +		/*	  new_ext	end_ext
> +		 * dest |--------------|---------------|
> +		 * org  |------------------------------|
> +		 */
> +
> +		o_end->ee_block = end_ext->ee_block;
> +		o_end->ee_len = end_ext->ee_len;
> +		ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
> +
> +		/*
> +		 * Set 0 to the extent block if new_ext was
> +		 * the first block.
> +		 */
> +		if (!new_ext->ee_block)
> +			eblock = 0;
> +		else
> +			eblock = le32_to_cpu(new_ext->ee_block);
> +
> +		new_flag = 1;
> +	} else {
> +		printk(KERN_ERR "ext4 defrag: Unexpected merge case\n");
> +		return -EIO;
> +	}
> +
> +	if (new_flag) {
> +		org_path = ext4_ext_find_extent(inode, eblock, NULL);
> +		if (IS_ERR(org_path)) {
> +			err = PTR_ERR(org_path);
> +			org_path = NULL;
> +			goto out;
> +		}
> +		err = ext4_ext_insert_extent_defrag(handle, inode,
> +					org_path, new_ext, defrag_flag);
> +		if (err)
> +			goto out;
> +	}
> +
> +	if (end_flag) {
> +		org_path = ext4_ext_find_extent(inode,
> +				le32_to_cpu(end_ext->ee_block) - 1, org_path);
> +		if (IS_ERR(org_path)) {
> +			err = PTR_ERR(org_path);
> +			org_path = NULL;
> +			goto out;
> +		}
> +		err = ext4_ext_insert_extent_defrag(handle, inode,
> +					org_path, end_ext, defrag_flag);
> +		if (err)
> +			goto out;
> +	}
> +out:
> +	if (org_path) {
> +		ext4_ext_drop_refs(org_path);
> +		kfree(org_path);
> +	}
> +
> +	return err;
> +
> +}
> +
> +/**
> + * ext4_defrag_merge_inside_block - Merge new extent to the extent block
> + *
> + * @handle		journal handle
> + * @inode		target file's inode
> + * @o_start		first original extent to be defraged
> + * @o_end		last original extent to be merged
> + * @start_ext		first new extent to be merged
> + * @new_ext		middle of new extent to be merged
> + * @end_ext		last new extent to be merged
> + * @eh			extent header of target leaf block
> + * @replaced		the number of blocks which will be replaced with new_ext
> + * @range_to_move	used to decide how to merge
> + *
> + * This function always returns 0.
> + */
> +static int
> +ext4_defrag_merge_inside_block(handle_t *handle, struct inode *inode,
> +		struct ext4_extent *o_start, struct ext4_extent *o_end,
> +		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
> +		struct ext4_extent *end_ext, struct ext4_extent_header *eh,
> +		ext4_fsblk_t replaced, int range_to_move)
> +{
> +	int i = 0;
> +	unsigned len;
> +
> +	/* Move the existing extents */
> +	if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
> +		len = EXT_LAST_EXTENT(eh) - (o_end + 1) + 1;
> +		len = len * sizeof(struct ext4_extent);
> +		memmove(o_end + 1 + range_to_move, o_end + 1, len);
> +	}
> +
> +	/* Insert start entry */
> +	if (le16_to_cpu(start_ext->ee_len))
> +		o_start[i++].ee_len = start_ext->ee_len;
> +
> +	/* Insert new entry */
> +	if (le16_to_cpu(new_ext->ee_len)) {
> +		o_start[i].ee_block = new_ext->ee_block;
> +		o_start[i].ee_len = cpu_to_le16(replaced);
> +		ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
> +	}
> +
> +	/* Insert end entry */
> +	if (end_ext->ee_len)
> +		o_start[i] = *end_ext;
> +
> +	/* Increment the total entries counter on the extent block */
> +	eh->eh_entries
> +		= cpu_to_le16(le16_to_cpu(eh->eh_entries) + range_to_move);
> +
> +	return 0;
> +}
> +
> +/**
> + * ext4_defrag_merge_extents - Merge new extent
> + *
> + * @handle	journal handle
> + * @inode	target file's inode
> + * @org_path	path indicates first extent to be defraged
> + * @o_start	first original extent to be defraged
> + * @o_end	last original extent to be defraged
> + * @start_ext	first new extent to be merged
> + * @new_ext	middle of new extent to be merged
> + * @end_ext	last new extent to be merged
> + * @replaced	the number of blocks which will be replaced with new_ext
> + * @flag	defrag mode (e.g. -f)
> + *
> + * This function returns 0 if succeed, otherwise returns error value.
> + */
> +static int
> +ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,
> +		struct ext4_ext_path *org_path,
> +		struct ext4_extent *o_start, struct ext4_extent *o_end,
> +		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
> +		struct ext4_extent *end_ext, ext4_fsblk_t replaced, int flag)
> +{
> +	struct  ext4_extent_header *eh;
> +	unsigned need_slots, slots_range;
> +	int	range_to_move, depth, ret;
> +
> +	/*
> +	 * The extents need to be inserted
> +	 * start_extent + new_extent + end_extent.
> +	 */
> +	need_slots = (le16_to_cpu(start_ext->ee_len) ? 1 : 0) +
> +			(le16_to_cpu(end_ext->ee_len) ? 1 : 0) +
> +			(le16_to_cpu(new_ext->ee_len) ? 1 : 0);
> +
> +	/* The number of slots between start and end */
> +	slots_range = o_end - o_start + 1;
> +
> +	/* Range to move the end of extent */
> +	range_to_move = need_slots - slots_range;
> +	depth = org_path->p_depth;
> +	org_path += depth;
> +	eh = org_path->p_hdr;
> +
> +	if (depth) {
> +		/* Register to journal */
> +		ret = ext4_journal_get_write_access(handle, org_path->p_bh);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	/* Expansion */
> +	if ((range_to_move > 0) &&
> +		(range_to_move > le16_to_cpu(eh->eh_max)
> +			- le16_to_cpu(eh->eh_entries))) {
> +
> +		ret = ext4_defrag_merge_across_blocks(handle, inode, o_start,
> +						o_end, start_ext, new_ext,
> +						end_ext, flag);
> +		if (ret < 0)
> +			return ret;
> +	} else {
> +		ret = ext4_defrag_merge_inside_block(handle, inode, o_start,
> +					o_end, start_ext, new_ext, end_ext,
> +					eh, replaced, range_to_move);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	if (depth) {
> +		ret = ext4_journal_dirty_metadata(handle, org_path->p_bh);
> +		if (ret)
> +			return ret;
> +	} else {
> +		ret = ext4_mark_inode_dirty(handle, inode);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +
> +}
> +
> +/**
> + * ext4_defrag_leaf_block - Defragmentation for one leaf extent block
> + *
> + * @handle	journal handle
> + * @org_inode	target inode
> + * @org_path	path indicates first extent to be defraged
> + * @dext	destination extent
> + * @from	start offset on the target file
> + * @flag	defrag mode (e.g. -f)
> + *
> + * This function returns 0 if succeed, otherwise returns error value.
> + */
> +static int
> +ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
> +		struct ext4_ext_path *org_path, struct ext4_extent *dext,
> +		ext4_lblk_t *from, int flag)
> +{
> +	unsigned long depth;
> +	ext4_fsblk_t replaced = 0;
> +	struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext;
> +	struct ext4_extent new_ext, start_ext, end_ext;
> +	ext4_lblk_t new_end, lblock;
> +	unsigned short len;
> +	ext4_fsblk_t new_phys_end;
> +	int	ret;
> +
> +	depth = ext_depth(org_inode);
> +	start_ext.ee_len = end_ext.ee_len = 0;
> +	o_start = o_end = oext = org_path[depth].p_ext;
> +	ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
> +	new_ext.ee_len = dext->ee_len;
> +	len = le16_to_cpu(new_ext.ee_len);
> +	new_ext.ee_block = cpu_to_le32(*from);
> +	lblock = le32_to_cpu(oext->ee_block);
> +	new_end = le32_to_cpu(new_ext.ee_block)
> +		+ le16_to_cpu(new_ext.ee_len) - 1;
> +	new_phys_end = ext_pblock(&new_ext)
> +		+ le16_to_cpu(new_ext.ee_len) - 1;
> +
> +	/*
> +	 * First original extent
> +	 * dest	 |---------------|
> +	 * org  |---------------|
> +	 */
> +	if (le32_to_cpu(new_ext.ee_block) >
> +		le32_to_cpu(oext->ee_block) &&
> +		le32_to_cpu(new_ext.ee_block) <
> +		le32_to_cpu(oext->ee_block)
> +		+ le16_to_cpu(oext->ee_len)) {
> +		start_ext.ee_len = cpu_to_le32(le32_to_cpu(new_ext.ee_block)
> +					- le32_to_cpu(oext->ee_block));
> +		replaced += le16_to_cpu(oext->ee_len)
> +					- le16_to_cpu(start_ext.ee_len);
> +	} else if (oext > EXT_FIRST_EXTENT(org_path[depth].p_hdr)) {
> +		/* We can merge previous extent. */
> +		prev_ext = oext - 1;
> +		if (((ext_pblock(prev_ext) + le32_to_cpu(prev_ext->ee_len))
> +				 == ext_pblock(&new_ext))
> +		 && (le32_to_cpu(prev_ext->ee_block)
> +			+ le32_to_cpu(prev_ext->ee_len)
> +				 == le32_to_cpu(new_ext.ee_block))) {
> +			o_start = prev_ext;
> +			start_ext.ee_len = cpu_to_le32(
> +					le16_to_cpu(prev_ext->ee_len)
> +					+ le16_to_cpu(new_ext.ee_len));
> +			new_ext.ee_len = 0;
> +		}
> +	}
> +
> +	for (;;) {
> +		/* The extent for destination must be found. */
> +		BUG_ON(!oext || lblock != le32_to_cpu(oext->ee_block));
> +		lblock += le16_to_cpu(oext->ee_len);
> +
> +		/*
> +		 * Middle of original extent
> +		 * dest |-------------------|
> +		 * org   |-----------------|
> +		 */
> +		if (le32_to_cpu(new_ext.ee_block) <=
> +			le32_to_cpu(oext->ee_block) &&
> +			new_end >= le32_to_cpu(oext->ee_block)
> +			+ le16_to_cpu(oext->ee_len) - 1)
> +			replaced += le16_to_cpu(oext->ee_len);
> +
> +		/*
> +		 * Last original extent
> +		 * dest |----------------|
> +		 * org	  |---------------|
> +		 */
> +		if (new_end >= le32_to_cpu(oext->ee_block) &&
> +			new_end < le32_to_cpu(oext->ee_block)
> +				+ le16_to_cpu(oext->ee_len) - 1) {
> +			end_ext.ee_len
> +				= cpu_to_le16(le32_to_cpu(oext->ee_block)
> +				+ le16_to_cpu(oext->ee_len) - 1 - new_end);
> +			ext4_ext_store_pblock(&end_ext, (ext_pblock(o_end)
> +				+ cpu_to_le16(oext->ee_len)
> +				- cpu_to_le16(end_ext.ee_len)));
> +			end_ext.ee_block
> +				= cpu_to_le32(le32_to_cpu(o_end->ee_block)
> +				+ le16_to_cpu(oext->ee_len)
> +				- le16_to_cpu(end_ext.ee_len));
> +			replaced += le16_to_cpu(oext->ee_len)
> +				- le16_to_cpu(end_ext.ee_len);
> +		}
> +
> +		/*
> +		 * Detected the block end, reached the number of replaced
> +		 * blocks to dext->ee_len. Then merge the extent.
> +		 */
> +		if (oext == EXT_LAST_EXTENT(org_path[depth].p_hdr) ||
> +			new_end <= le32_to_cpu(oext->ee_block)
> +				+ le16_to_cpu(oext->ee_len) - 1) {
> +			ret = ext4_defrag_merge_extents(handle, org_inode,
> +					org_path, o_start, o_end, &start_ext,
> +					&new_ext, &end_ext, replaced, flag);
> +			if (ret < 0)
> +				return ret;
> +
> +			/* All expected blocks are replaced */
> +			if (le16_to_cpu(new_ext.ee_len) <= 0) {
> +				if (DQUOT_ALLOC_BLOCK(org_inode, len))
> +					return -EDQUOT;
> +				return 0;
> +			}
> +
> +			/* Re-calculate new_ext */
> +			new_ext.ee_len = cpu_to_le32(le16_to_cpu(new_ext.ee_len)
> +				- replaced);
> +			new_ext.ee_block =
> +				cpu_to_le32(le32_to_cpu(new_ext.ee_block)
> +				+ replaced);
> +			ext4_ext_store_pblock(&new_ext, ext_pblock(&new_ext)
> +					+ replaced);
> +			replaced = 0;
> +			start_ext.ee_len = end_ext.ee_len = 0;
> +			o_start = NULL;
> +
> +			/* All expected blocks are replaced. */
> +			if (le16_to_cpu(new_ext.ee_len) <= 0) {
> +				if (DQUOT_ALLOC_BLOCK(org_inode, len))
> +					return -EDQUOT;
> +				return 0;
> +			}
> +		}
> +
> +		/* Get the next extent for original. */
> +		if (org_path)
> +			ext4_ext_drop_refs(org_path);
> +		org_path = ext4_ext_find_extent(org_inode, lblock, org_path);
> +		if (IS_ERR(org_path)) {
> +			ret = PTR_ERR(org_path);
> +			org_path = NULL;
> +			return ret;
> +		}
> +		depth = ext_depth(org_inode);
> +		oext = org_path[depth].p_ext;
> +		if (oext->ee_block + oext->ee_len <= lblock)
> +			return -ENOENT;
> +
> +		o_end = oext;
> +		if (!o_start)
> +			o_start = oext;
> +	}
> +}
> +
> +/**
> + * ext4_defrag_replace_branches - Replace original extents with new extents
> + *
> + * @handle		journal handle
> + * @org_inode		original inode
> + * @dest_inode		temporary inode
> + * @from_page		page offset of org_inode
> + * @dest_from_page	page offset of dest_inode
> + * @count_page		page count to be replaced
> + * @flag		defrag mode (e.g. -f)
> + *
> + * This function returns 0 if succeed, otherwise returns error value.
> + * Replace extents for blocks from "from" to "from + count - 1".
> + */
> +static int
> +ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
> +			struct inode *dest_inode, pgoff_t from_page,
> +			pgoff_t dest_from_page, pgoff_t count_page, int flag)
> +{
> +	struct ext4_ext_path *org_path = NULL;
> +	struct ext4_ext_path *dest_path = NULL;
> +	struct ext4_extent *oext, *dext, *swap_ext;
> +	struct ext4_extent tmp_ext, tmp_ext2;
> +	ext4_lblk_t from, count, dest_off, diff, org_diff;
> +	int err = 0;
> +	int depth;
> +	int replaced_count = 0;
> +
> +	from = (ext4_lblk_t)from_page <<
> +			(PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
> +	count = (ext4_lblk_t)count_page <<
> +			(PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
> +	dest_off = (ext4_lblk_t)dest_from_page <<
> +			(PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
> +
> +	/* Get the original extent for the block "from" */
> +	org_path = ext4_ext_find_extent(org_inode, from, NULL);
> +	if (IS_ERR(org_path)) {
> +		err = PTR_ERR(org_path);
> +		org_path = NULL;
> +		goto out;
> +	}
> +
> +	/* Get the destination extent for the head */
> +	dest_path = ext4_ext_find_extent(dest_inode, dest_off, NULL);
> +	if (IS_ERR(dest_path)) {
> +		err = PTR_ERR(dest_path);
> +		dest_path = NULL;
> +		goto out;
> +	}
> +	depth = ext_depth(dest_inode);
> +	dext = dest_path[depth].p_ext;
> +	/* When dext is too large, pick up the target range. */
> +	diff = dest_off - le32_to_cpu(dext->ee_block);
> +	ext4_ext_store_pblock(&tmp_ext, ext_pblock(dext) + diff);
> +	tmp_ext.ee_block = cpu_to_le32(le32_to_cpu(dext->ee_block) + diff);
> +	tmp_ext.ee_len = cpu_to_le16(le16_to_cpu(dext->ee_len) - diff);
> +	if (count < le16_to_cpu(tmp_ext.ee_len))
> +		tmp_ext.ee_len = cpu_to_le16(count);
> +	dext = &tmp_ext;
> +
> +	depth = ext_depth(org_inode);
> +	oext = org_path[depth].p_ext;
> +	org_diff = from - le32_to_cpu(oext->ee_block);
> +	ext4_ext_store_pblock(&tmp_ext2, ext_pblock(oext) + org_diff);
> +	tmp_ext2.ee_block = tmp_ext.ee_block;
> +
> +	/* Adjust extent length when blocksize != pagesize */
> +	if (tmp_ext.ee_len <= (oext->ee_len - org_diff)) {
> +		tmp_ext2.ee_len = tmp_ext.ee_len;
> +	} else {
> +		tmp_ext2.ee_len = oext->ee_len - org_diff;
> +		tmp_ext.ee_len = tmp_ext2.ee_len;
> +	}
> +	swap_ext = &tmp_ext2;
> +
> +	/* Loop for the destination extents */
> +	while (1) {
> +		/* The extent for destination must be found. */
> +		BUG_ON(!dext || dest_off != le32_to_cpu(dext->ee_block));
> +
> +		/* Loop for the original extent blocks */
> +		err = ext4_defrag_leaf_block(handle, org_inode,
> +						org_path, dext, &from, flag);
> +		if (err < 0)
> +			goto out;
> +
> +		/*
> +		 * We need the function which fixes extent information for
> +		 * inserting.
> +		 * e.g. ext4_defrag_merge_extents()
> +		 */
> +		err = ext4_defrag_leaf_block(handle, dest_inode,
> +					dest_path, swap_ext, &dest_off, -1);
> +		if (err < 0)
> +			goto out;
> +
> +		replaced_count += le16_to_cpu(dext->ee_len);
> +		dest_off += le16_to_cpu(dext->ee_len);
> +		from += le16_to_cpu(dext->ee_len);
> +
> +		/* Already moved the expected blocks */
> +		if (replaced_count >= count)
> +			break;
> +
> +		if (org_path)
> +			ext4_ext_drop_refs(org_path);
> +		org_path = ext4_ext_find_extent(org_inode, from, NULL);
> +		if (IS_ERR(org_path)) {
> +			err = PTR_ERR(org_path);
> +			org_path = NULL;
> +			goto out;
> +		}
> +		depth = ext_depth(org_inode);
> +		oext = org_path[depth].p_ext;
> +		if (oext->ee_block + oext->ee_len <= from) {
> +			err = 0;
> +			goto out;
> +		}
> +
> +		if (dest_path)
> +			ext4_ext_drop_refs(dest_path);
> +		dest_path = ext4_ext_find_extent(dest_inode, dest_off, NULL);
> +		if (IS_ERR(dest_path)) {
> +			err = PTR_ERR(dest_path);
> +			dest_path = NULL;
> +			goto out;
> +		}
> +		depth = ext_depth(dest_inode);
> +		dext = dest_path[depth].p_ext;
> +		if (dext->ee_block + dext->ee_len <= dest_off) {
> +			err = 0;
> +			goto out;
> +		}
> +
> +		/* When dext is too large, pick up the target range. */
> +		diff = dest_off - le32_to_cpu(dext->ee_block);
> +		ext4_ext_store_pblock(&tmp_ext, ext_pblock(dext) + diff);
> +		tmp_ext.ee_block =
> +			cpu_to_le32(le32_to_cpu(dext->ee_block) + diff);
> +		tmp_ext.ee_len = cpu_to_le16(le16_to_cpu(dext->ee_len) - diff);
> +
> +		if ((count - replaced_count) < le16_to_cpu(tmp_ext.ee_len))
> +			tmp_ext.ee_len = count - replaced_count ;
> +
> +		dext = &tmp_ext;
> +
> +		org_diff = from - le32_to_cpu(oext->ee_block);
> +		ext4_ext_store_pblock(&tmp_ext2, ext_pblock(oext) + org_diff);
> +		tmp_ext2.ee_block = tmp_ext.ee_block;
> +
> +		/* Adjust extent length when blocksize != pagesize */
> +		if (tmp_ext.ee_len <= (oext->ee_len - org_diff)) {
> +			tmp_ext2.ee_len = tmp_ext.ee_len;
> +		} else {
> +			tmp_ext2.ee_len = oext->ee_len - org_diff;
> +			tmp_ext.ee_len = tmp_ext2.ee_len;
> +		}
> +		swap_ext = &tmp_ext2;
> +	}
> +
> +out:
> +	if (org_path) {
> +		ext4_ext_drop_refs(org_path);
> +		kfree(org_path);
> +	}
> +	if (dest_path) {
> +		ext4_ext_drop_refs(dest_path);
> +		kfree(dest_path);
> +	}
> +
> +	return err;
> +}
> +
> +/**
>   * ext4_defrag_alloc_blocks - Allocate contiguous blocks to temporary inode
>   *
>   * @dest_inode		temporary inode for multiple block allocation
> @@ -336,6 +953,127 @@ out2:
>  }
> 
>  /**
> + * ext4_defrag_partial - Defrag a file per page
> + *
> + * @tmp_inode:		the inode which has blocks to swap with original
> + * @filp:		pointer to file
> + * @org_offset:		page index on original file
> + * @dest_offset:	page index on temporary file
> + * @flag:		defrag mode (e.g. -f)
> + *
> + * This function returns 0 if succeeded, otherwise returns error value.
> + */
> +static int
> +ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
> +			pgoff_t org_offset, pgoff_t dest_offset, int flag)
> +{
> +	struct inode *inode = filp->f_dentry->d_inode;
> +	struct address_space *mapping = inode->i_mapping;
> +	struct buffer_head *bh;
> +	struct page *page;
> +	const struct address_space_operations *a_ops = mapping->a_ops;
> +	handle_t *handle;
> +	pgoff_t offset_in_page = PAGE_SIZE;
> +	int jblocks;
> +	int ret = 0;
> +	int blocksize = inode->i_sb->s_blocksize;
> +	int blocks_per_page = 0;
> +	int i = 0;
> +	long long offs = org_offset << PAGE_CACHE_SHIFT;
> +	unsigned long blk_off = 0;
> +	unsigned int w_flags = 0;
> +	void *fsdata;
> +
> +	/*
> +	 * It needs twice the amount of ordinary journal buffers because
> +	 * inode and tmp_inode may change each different metadata blocks.
> +	 */
> +	jblocks = ext4_writepage_trans_blocks(inode) * 2;
> +	handle = ext4_journal_start(inode, jblocks);
> +	if (IS_ERR(handle)) {
> +		ret = PTR_ERR(handle);
> +		return ret;
> +	}
> +
> +	if (segment_eq(get_fs(), KERNEL_DS))
> +		w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
> +
> +	if (org_offset == ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
> +		offset_in_page = (inode->i_size & (PAGE_CACHE_SIZE - 1));
> +		/*
> +		 * Set PAGE_CACHE_SIZE to offset_in_page not be 0
> +		 * if org_offset is the last page and i_size is
> +		 * multiples of PAGE_CACHE_SIZE.
> +		 */
> +		if (offset_in_page == 0)
> +			offset_in_page = PAGE_CACHE_SIZE;
> +	}
> +
> +	up_write(&EXT4_I(inode)->i_data_sem);
> +	ret = a_ops->write_begin(filp, mapping, offs,
> +				offset_in_page, w_flags, &page, &fsdata);
> +	down_write(&EXT4_I(inode)->i_data_sem);
> +
> +	if (unlikely(ret < 0))
> +		goto out;
> +
> +	if (!PageUptodate(page)) {
> +		mapping->a_ops->readpage(filp, page);
> +		lock_page(page);
> +	}
> +
> +	/*
> +	 * try_to_release_page() doesn't call relasepage in writeback mode.
> +	 * We should care about the order of writing to the same file
> +	 * by multiple defrag processes.
> +	 * It needs to call wait_on_page_writeback() to wait for the
> +	 * writeback of the page.
> +	 */
> +	if (PageWriteback(page))
> +		wait_on_page_writeback(page);
> +
> +	/* Release old bh and drop refs */
> +	try_to_release_page(page, 0);
> +	ret = ext4_defrag_replace_branches(handle, inode, tmp_inode,
> +					org_offset, dest_offset, 1, flag);
> +
> +	if (ret < 0)
> +		goto out;
> +
> +	/* Clear the inode cache not to refer to the old data */
> +	ext4_ext_invalidate_cache(inode);
> +
> +	if (!page_has_buffers(page))
> +		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
> +
> +	blocks_per_page = PAGE_SIZE / blocksize;
> +	blk_off = org_offset * blocks_per_page;
> +
> +	bh = page_buffers(page);
> +	for (i = 0; i < blocks_per_page; i++) {
> +		up_write(&EXT4_I(inode)->i_data_sem);
> +		ret = ext4_get_block(inode, blk_off++, bh, 0);
> +		down_write(&EXT4_I(inode)->i_data_sem);
> +
> +		if (ret < 0)
> +			goto out;
> +
> +		if (bh->b_this_page != NULL)
> +			bh = bh->b_this_page;
> +	}
> +
> +	ret = a_ops->write_end(filp, mapping, offs, offset_in_page,
> +				offset_in_page, page, fsdata);
> +
> +	if (unlikely(ret < 0))
> +		goto out;
> +out:
> +	ext4_journal_stop(handle);
> +
> +	return (ret < 0 ? ret : 0);
> +}
> +
> +/**
>   * ext4_defrag_new_extent_tree -  Allocate contiguous blocks
>   *
>   * @inode:		inode of the original file
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index f8828ff..dd8dc46 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1171,7 +1171,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
>   * allocated block. Thus, index entries have to be consistent
>   * with leaves.
>   */
> -static ext4_lblk_t
> +ext4_lblk_t
>  ext4_ext_next_allocated_block(struct ext4_ext_path *path)
>  {
>  	int depth;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 0f252db..695877e 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -991,8 +991,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
>  	up_write((&EXT4_I(inode)->i_data_sem));
>  	return retval;
>  }
> -
> -static int ext4_get_block(struct inode *inode, sector_t iblock,
> +int ext4_get_block(struct inode *inode, sector_t iblock,
>  			struct buffer_head *bh_result, int create)
>  {
>  	handle_t *handle = ext4_journal_current_handle();
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ