lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <878v219ntd.fsf@openvz.org>
Date:	Sun, 23 Jun 2013 13:30:22 +0400
From:	Dmitry Monakhov <dmonakhov@...nvz.org>
To:	Namjae Jeon <linkinjeon@...il.com>, tytso@....edu,
	adilger.kernel@...ger.ca
Cc:	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
	linux-ext4@...r.kernel.org, a.sangwan@...sung.com,
	Namjae Jeon <linkinjeon@...il.com>,
	Namjae Jeon <namjae.jeon@...sung.com>
Subject: Re: [PATCH 1/3] ext4: Add EXT4_IOC_TRUNCATE_BLOCK_RANGE ioctl

On Sun, 23 Jun 2013 15:07:36 +0900, Namjae Jeon <linkinjeon@...il.com> wrote:
> From: Namjae Jeon <namjae.jeon@...sung.com>
What is the difference between this ioctl and generic punch_hole?
> 
> The EXT4_IOC_TRUNCATE_BLOCK_RANGE removes the data blocks lying
> between [start, "start + length") and updates the logical block numbers
> of data blocks starting from "start + length" block to last block of file.
> This will maintain contiguous nature of logical block numbers
> after block removal.
> Both the inode's disksize and logical size are updated after block
> removal
> 
> Signed-off-by: Namjae Jeon <namjae.jeon@...sung.com>
> Signed-off-by: Ashish Sangwan <a.sangwan@...sung.com>
> ---
>  fs/ext4/ext4.h         |    8 ++
>  fs/ext4/ext4_extents.h |    3 +
>  fs/ext4/extents.c      |  245 ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/ext4/ioctl.c        |   62 ++++++++++++
>  4 files changed, 318 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6ed348d..df2c411 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -590,6 +590,7 @@ enum {
>  #define EXT4_IOC_MOVE_EXT		_IOWR('f', 15, struct move_extent)
>  #define EXT4_IOC_RESIZE_FS		_IOW('f', 16, __u64)
>  #define EXT4_IOC_SWAP_BOOT		_IO('f', 17)
> +#define EXT4_IOC_TRUNCATE_BLOCK_RANGE	_IOW('f', 18, struct truncate_range)
>  
>  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
>  /*
> @@ -682,6 +683,11 @@ struct move_extent {
>  	__u64 moved_len;	/* moved block length */
>  };
>  
> +struct truncate_range {
> +	__u32 start_block;
> +	__u32 length;
> +};
> +
>  #define EXT4_EPOCH_BITS 2
>  #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
>  #define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)
> @@ -2692,6 +2698,8 @@ extern int ext4_find_delalloc_range(struct inode *inode,
>  extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
>  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>  			__u64 start, __u64 len);
> +extern int ext4_ext_truncate_range(struct inode *inode, ext4_lblk_t start,
> +				   ext4_lblk_t end, ext4_lblk_t last_block);
>  
>  
>  /* move_extent.c */
> diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
> index 51bc821..cc113cc 100644
> --- a/fs/ext4/ext4_extents.h
> +++ b/fs/ext4/ext4_extents.h
> @@ -178,6 +178,9 @@ struct ext4_ext_path {
>  #define EXT_MAX_INDEX(__hdr__) \
>  	(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
>  
> +#define EXTENT_START_FLAG	0x1
> +#define INDEX_START_FLAG	0x2
> +
>  static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
>  {
>  	return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 937593e..ed85e34 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4757,3 +4757,248 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>  
>  	return error;
>  }
> +
> +/*
> + * ext4_trange_dirty_path: Function to mark the path buffer dirty.
> + * It also checks if there are sufficient credits left in the
> + * journal to update metadata. If the number of credits are less
> + * restart the handle with additional credits.
> + *
> + * @handle: journal handle
> + * @inode: file inode
> + * @path: pointer to path
> + * @num: number of inodes to be updated
> + *
> + * Returns: 0 on success or negative value on error
> + */
> +int ext4_trange_dirty_path(handle_t *handle, struct inode *inode,
> +			   struct ext4_ext_path *path,
> +			   int num, ...)
> +{
> +	int credits, err, i;
> +	struct inode *iptr;
> +	va_list args;
> +
> +	/*
> +	 * Check if need to extend journal credits
> +	 * 3 for leaf, sb, and inode plus 2 (bmap and group
> +	 * descriptor) for each block group; assume two block
> +	 * groups
> +	 */
> +	if (handle->h_buffer_credits < 7*(num + 1)) {
> +		credits = ext4_writepage_trans_blocks(inode);
> +		va_start(args, num);
> +		for (i = 1; i <= num; i++) {
> +			iptr = va_arg(args, struct inode *);
> +			credits += ext4_writepage_trans_blocks(iptr);
> +		}
> +		va_end(args);
> +		err = ext4_ext_truncate_extend_restart(handle, inode, credits);
> +		/* EAGAIN is success */
> +		if (err && err != -EAGAIN)
> +			return err;
> +	}
> +	err = ext4_ext_get_access(handle, inode, path);
> +	return err;
> +}
> +
> +/*
> + * ext4_ext_update_path: update the extents of a path structure
> + * lying between path[depth].p_ext and EXT_LAST_EXTENT(path[depth].p_hdr)
> + * subtracting shift from starting block for each extent.
> + *
> + * @path: path for which extents are updated
> + * @shift: Number of blocks to be subtracted from first logical block
> + * that extent covers for each extent.
> + * @inode: file inode
> + * @handle: journal handle
> + * @start_block: Points to the starting block of next extent which is
> + * to be updated.
> + *
> + * Returns: 0 on success or negative on error.
> + */
> +int ext4_ext_update_path(struct ext4_ext_path *path, ext4_lblk_t shift,
> +			 struct inode *inode, handle_t *handle,
> +			 ext4_lblk_t *start_block)
> +{
> +	int depth, err = 0, flag = 0;
> +	struct ext4_extent *ex_start, *ex_last;
> +
> +	depth = path->p_depth;
> +	while (depth >= 0) {
> +		if (depth == path->p_depth) {
> +			ex_start = path[depth].p_ext;
> +			if (!ex_start)
> +				return -EIO;
> +
> +			err = ext4_trange_dirty_path(handle, inode,
> +						     path + depth, 0);
> +			if (err)
> +				goto out;
> +
> +			if (path[depth].p_ext ==
> +				EXT_FIRST_EXTENT(path[depth].p_hdr))
> +				flag |= EXTENT_START_FLAG;
> +
> +			ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
> +			while (ex_start <= ex_last) {
> +				*start_block = ex_start->ee_block +
> +					ext4_ext_get_actual_len(ex_start);
> +				ex_start->ee_block -= shift;
> +				ex_start++;
> +			}
> +			err = ext4_ext_dirty(handle, inode, path + depth);
> +			if (err)
> +				goto out;
> +		} else {
> +			/* If encountered starting extent, update index too */
> +			if (path->p_depth - depth == 1) {
> +				if (flag & EXTENT_START_FLAG) {
> +					/* Update index too */
> +					err = ext4_trange_dirty_path(handle,
> +						    inode, path + depth, 0);
> +					if (err)
> +						goto out;
> +					path[depth].p_idx->ei_block -= shift;
> +					err = ext4_ext_dirty(handle, inode,
> +							     path + depth);
> +					if (err)
> +						goto out;
> +					flag &= ~EXTENT_START_FLAG;
> +				} else
> +					/* No need to update any extent index */
> +					break;
> +			}
> +			/* Check, if earlier encountered starting index */
> +			if (flag & INDEX_START_FLAG) {
> +				err = ext4_trange_dirty_path(handle, inode,
> +							path + (depth), 0);
> +				if (err)
> +					goto out;
> +				path[depth].p_idx->ei_block -= shift;
> +				err = ext4_ext_dirty(handle, inode,
> +						     path + depth);
> +				if (err)
> +					goto out;
> +				flag &= ~INDEX_START_FLAG;
> +			}
> +			/* Check if this is a starting index */
> +			if (path[depth].p_idx ==
> +			    EXT_FIRST_INDEX(path[depth].p_hdr)) {
> +				/* starting of a block */
> +				flag |= INDEX_START_FLAG;
> +			} else
> +				break;
> +		}
> +		depth--;
> +	}
> +out:
> +	return err;
> +}
> +
> +/*
> + * ext4_ext_update_logical: update logical blocks ranging from start
> + * to the end block for inode by moving them shift blocks to the left
> + *
> + * @inode: file inode
> + * @handle: journal handle
> + * @start_block : starting block for block updation
> + * @shift: number of blocks to be shifted
> + * @end_block: last block to be updated
> + *
> + * Returns: 0 on success or negative on failure
> + */
> +static int ext4_ext_update_logical(struct inode *inode, handle_t *handle,
> +				   ext4_lblk_t start_block, ext4_lblk_t shift,
> +				   ext4_lblk_t end_block)
> +{
> +	struct ext4_ext_path *path;
> +	int err = 0;
> +
> +	while (start_block < end_block) {
> +		path = ext4_ext_find_extent(inode, start_block, NULL);
> +		if (IS_ERR(path)) {
> +			err = PTR_ERR(path);
> +			break;
> +		}
> +		err = ext4_ext_update_path(path, shift, inode,
> +					   handle, &start_block);
> +		ext4_ext_drop_refs(path);
> +		kfree(path);
> +		if (err)
> +			break;
> +	}
> +	return err;
> +}
> +
> +/*
> + * ext4_ext_truncate_range: truncate the block range from start
> + * block to end block including the end block from inode.
> + *
> + * @inode: file inode
> + * @start: start block
> + * @end: end block
> + * last_block: last_block number of the inode
> + *
> + * Returns: 0 on success or negative on error
> + */
> +int ext4_ext_truncate_range(struct inode *inode, ext4_lblk_t start,
> +			    ext4_lblk_t end, ext4_lblk_t last_block)
> +{
> +	int ret, credits;
> +	ext4_lblk_t shift = end - start + 1;
> +	handle_t *handle;
> +	loff_t isize_reduced;
> +	int blkbits = inode->i_blkbits;
> +	struct address_space *mapping = inode->i_mapping;
> +
> +	/* sync dirty pages for transfer */
> +	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
> +		ret = filemap_write_and_wait_range(mapping,
> +				(loff_t)start << blkbits,
> +				((loff_t)(last_block + 1) << blkbits) - 1);
> +		if (ret)
> +			return ret;
> +	}
> +	truncate_inode_pages_range(inode->i_mapping,
> +				   start << inode->i_blkbits, -1);
> +	ext4_inode_block_unlocked_dio(inode);
> +	inode_dio_wait(inode);
> +	down_write(&EXT4_I(inode)->i_data_sem);
> +	ext4_discard_preallocations(inode);
> +	ret = ext4_es_remove_extent(inode, start, end - start + 1);
> +	if (ret)
> +		goto out;
> +
> +	credits = ext4_writepage_trans_blocks(inode);
> +	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
> +	if (IS_ERR(handle)) {
> +		ret = PTR_ERR(handle);
> +		goto out;
> +	}
> +
> +	ret = ext4_ext_remove_space(inode, start, end);
> +	if (ret)
> +		goto journal_stop;
> +
> +	ext4_discard_preallocations(inode);
> +
> +	if (end < last_block) {
> +		ret = ext4_ext_update_logical(inode, handle, end + 1,
> +					      shift, last_block + 1);
> +		if (ret)
> +			goto journal_stop;
> +	}
> +	isize_reduced = (loff_t)shift << blkbits;
> +	i_size_write(inode, inode->i_size - isize_reduced);
> +	EXT4_I(inode)->i_disksize -= isize_reduced;
> +	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
> +	ext4_mark_inode_dirty(handle, inode);
> +journal_stop:
> +	ext4_journal_stop(handle);
> +out:
> +	ext4_inode_resume_unlocked_dio(inode);
> +	up_write(&EXT4_I(inode)->i_data_sem);
> +	return ret;
> +}
> +
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 9491ac0..0530daf 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -622,6 +622,68 @@ resizefs_out:
>  
>  		return 0;
>  	}
> +	case EXT4_IOC_TRUNCATE_BLOCK_RANGE:
> +	{
> +		struct truncate_range tr;
> +		ext4_lblk_t last_block, end_block;
> +		int error;
> +		loff_t i_size = i_size_read(inode);
> +
> +		if (!i_size)
> +			return 0;
> +
> +		if (!(filp->f_mode & FMODE_WRITE))
> +			return -EBADF;
> +
> +		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
> +			return -EPERM;
> +
> +		if (!S_ISREG(inode->i_mode))
> +			return -EOPNOTSUPP;
> +
> +		if (IS_SWAPFILE(inode))
> +			return -EOPNOTSUPP;
> +
> +		if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
> +			return -EOPNOTSUPP;
> +
> +		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
> +		    EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
> +			ext4_msg(sb, KERN_ERR,
> +			 "Truncate block range not supported with bigalloc");
> +			return -EOPNOTSUPP;
> +		}
> +
> +		if (copy_from_user(&tr, (const void *) arg,
> +				sizeof(struct truncate_range)))
> +			return -EFAULT;
> +
> +		if (!tr.length)
> +			return -EINVAL;
> +
> +		end_block = tr.start_block + tr.length - 1;
> +
> +		last_block = ((round_up(i_size,
> +					EXT4_BLOCK_SIZE(inode->i_sb)))
> +			      >> inode->i_blkbits) - 1;
> +		if (tr.start_block > end_block ||
> +		    tr.start_block > last_block)
> +			return -EINVAL;
> +
> +		if (end_block > last_block)
> +			end_block = last_block;
> +
> +		error = mnt_want_write_file(filp);
> +		if (error)
> +			return error;
> +
> +		mutex_lock(&inode->i_mutex);
> +		error = ext4_ext_truncate_range(inode, tr.start_block,
> +						end_block, last_block);
> +		mutex_unlock(&inode->i_mutex);
> +		mnt_drop_write_file(filp);
> +		return error;
> +	}
>  
>  	default:
>  		return -ENOTTY;
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists