lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87sk8dsj35.fsf@linux.vnet.ibm.com>
Date:	Sat, 06 Mar 2010 18:28:06 +0530
From:	"Aneesh Kumar K. V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	"Theodore Ts'o" <tytso@....edu>,
	Ext4 Developers List <linux-ext4@...r.kernel.org>
Cc:	"Theodore Ts'o" <tytso@....edu>,
	Jiaying Zhang <jiayingz@...gle.com>
Subject: Re: [PATCH 24/28] ext4: mechanical change on dio get_block code in
 prepare for it to be used by buffer write

On Tue,  2 Mar 2010 13:18:41 -0500, "Theodore Ts'o" <tytso@....edu> wrote:
> Renaming the dio block allocation flags, variables, and functions
> introduced in Mingming's "Direct IO for holes and fallocate"
> patches so that they can be used by ext4 buffer write as well.
> Also changed the related function comments accordingly to cover
> both direct write and buffer wirte cases.
> 
> Signed-off-by: Jiaying Zhang <jiayingz@...gle.com>
> Signed-off-by: "Theodore Ts'o" <tytso@....edu>
> ---
>  fs/ext4/ext4.h    |   18 ++++++------
>  fs/ext4/extents.c |   24 +++++++-------
>  fs/ext4/fsync.c   |    2 +-
>  fs/ext4/inode.c   |   84 ++++++++++++++++++++++++-----------------------------
>  fs/ext4/super.c   |    2 +-
>  5 files changed, 61 insertions(+), 69 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 74664ca..c831a58 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -133,7 +133,7 @@ struct mpage_da_data {
>  	int pages_written;
>  	int retval;
>  };
> -#define	DIO_AIO_UNWRITTEN	0x1
> +#define	EXT4_IO_UNWRITTEN	0x1
>  typedef struct ext4_io_end {
>  	struct list_head	list;		/* per-file finished AIO list */
>  	struct inode		*inode;		/* file being written to */
> @@ -355,13 +355,13 @@ struct ext4_new_group_data {
>  	/* caller is from the direct IO path, request to creation of an
>  	unitialized extents if not allocated, split the uninitialized
>  	extent if blocks has been preallocated already*/
> -#define EXT4_GET_BLOCKS_DIO			0x0008
> +#define EXT4_GET_BLOCKS_PRE_IO			0x0008
>  #define EXT4_GET_BLOCKS_CONVERT			0x0010
> -#define EXT4_GET_BLOCKS_DIO_CREATE_EXT		(EXT4_GET_BLOCKS_DIO|\
> +#define EXT4_GET_BLOCKS_IO_CREATE_EXT		(EXT4_GET_BLOCKS_PRE_IO|\
>  					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
> -	/* Convert extent to initialized after direct IO complete */
> -#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
> -					 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
> +	/* Convert extent to initialized after IO complete */
> +#define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
> +					 EXT4_GET_BLOCKS_IO_CREATE_EXT)
> 

Last time reviewed we said this flags need to documented further. I also
had a question regarding the last flag. The related message id is
message-id:87y6jw23yn.fsf@...ux.vnet.ibm.com


>  /*
>   * Flags used by ext4_free_blocks
> @@ -700,8 +700,8 @@ struct ext4_inode_info {
>  	qsize_t i_reserved_quota;
>  #endif
> 
> -	/* completed async DIOs that might need unwritten extents handling */
> -	struct list_head i_aio_dio_complete_list;
> +	/* completed IOs that might need unwritten extents handling */
> +	struct list_head i_completed_io_list;
>  	/* current io_end structure for async DIO write*/
>  	ext4_io_end_t *cur_aio_dio;
> 
> @@ -1461,7 +1461,7 @@ extern int ext4_block_truncate_page(handle_t *handle,
>  		struct address_space *mapping, loff_t from);
>  extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
>  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
> -extern int flush_aio_dio_completed_IO(struct inode *inode);
> +extern int flush_completed_IO(struct inode *inode);
>  extern void ext4_da_update_reserve_space(struct inode *inode,
>  					int used, int quota_claim);
>  /* ioctl.c */
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index e0e2009..f6ea4a2 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1619,7 +1619,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
>  	BUG_ON(path[depth].p_hdr == NULL);
> 
>  	/* try to insert block into found extent and return */
> -	if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT)
> +	if (ex && (flag != EXT4_GET_BLOCKS_PRE_IO)
>  		&& ext4_can_extents_be_merged(inode, ex, newext)) {
>  		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
>  				ext4_ext_is_uninitialized(newext),
> @@ -1740,7 +1740,7 @@ has_space:
> 
>  merge:
>  	/* try to merge extents to the right */
> -	if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT)
> +	if (flag != EXT4_GET_BLOCKS_PRE_IO)
>  		ext4_ext_try_to_merge(inode, path, nearex);
> 
>  	/* try to merge extents to the left */
> @@ -2984,7 +2984,7 @@ fix_extent_len:
>  	ext4_ext_dirty(handle, inode, path + depth);
>  	return err;
>  }
> -static int ext4_convert_unwritten_extents_dio(handle_t *handle,
> +static int ext4_convert_unwritten_extents_endio(handle_t *handle,
>  					      struct inode *inode,
>  					      struct ext4_ext_path *path)
>  {
> @@ -3064,8 +3064,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
>  		  flags, allocated);
>  	ext4_ext_show_leaf(inode, path);
> 
> -	/* DIO get_block() before submit the IO, split the extent */
> -	if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
> +	/* get_block() before submit the IO, split the extent */
> +	if (flags == EXT4_GET_BLOCKS_PRE_IO) {
>  		ret = ext4_split_unwritten_extents(handle,
>  						inode, path, iblock,
>  						max_blocks, flags);
> @@ -3075,14 +3075,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
>  		 * completed
>  		 */
>  		if (io)
> -			io->flag = DIO_AIO_UNWRITTEN;
> +			io->flag = EXT4_IO_UNWRITTEN;
>  		else
>  			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
>  		goto out;
>  	}
> -	/* async DIO end_io complete, convert the filled extent to written */
> -	if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
> -		ret = ext4_convert_unwritten_extents_dio(handle, inode,
> +	/* IO end_io complete, convert the filled extent to written */
> +	if (flags == EXT4_GET_BLOCKS_CONVERT) {
> +		ret = ext4_convert_unwritten_extents_endio(handle, inode,
>  							path);
>  		if (ret >= 0)
>  			ext4_update_inode_fsync_trans(handle, inode, 1);
> @@ -3359,9 +3359,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>  		 * For non asycn direct IO case, flag the inode state
>  		 * that we need to perform convertion when IO is done.
>  		 */
> -		if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
> +		if (flags == EXT4_GET_BLOCKS_PRE_IO) {
>  			if (io)
> -				io->flag = DIO_AIO_UNWRITTEN;
> +				io->flag = EXT4_IO_UNWRITTEN;
>  			else
>  				ext4_set_inode_state(inode,
>  						     EXT4_STATE_DIO_UNWRITTEN);
> @@ -3656,7 +3656,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
>  		map_bh.b_state = 0;
>  		ret = ext4_get_blocks(handle, inode, block,
>  				      max_blocks, &map_bh,
> -				      EXT4_GET_BLOCKS_DIO_CONVERT_EXT);
> +				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
>  		if (ret <= 0) {
>  			WARN_ON(ret <= 0);
>  			printk(KERN_ERR "%s: ext4_ext_get_blocks "
> diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
> index 98bd140..0d0c323 100644
> --- a/fs/ext4/fsync.c
> +++ b/fs/ext4/fsync.c
> @@ -63,7 +63,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
>  	if (inode->i_sb->s_flags & MS_RDONLY)
>  		return 0;
> 
> -	ret = flush_aio_dio_completed_IO(inode);
> +	ret = flush_completed_IO(inode);
>  	if (ret < 0)
>  		return ret;
>  	
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 427f469..28f116b 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3468,7 +3468,7 @@ out:
>  	return ret;
>  }
> 
> -static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock,
> +static int ext4_get_block_write(struct inode *inode, sector_t iblock,
>  		   struct buffer_head *bh_result, int create)
>  {
>  	handle_t *handle = NULL;
> @@ -3476,28 +3476,14 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock,
>  	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
>  	int dio_credits;
> 
> -	ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n",
> +	ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
>  		   inode->i_ino, create);
>  	/*
> -	 * DIO VFS code passes create = 0 flag for write to
> -	 * the middle of file. It does this to avoid block
> -	 * allocation for holes, to prevent expose stale data
> -	 * out when there is parallel buffered read (which does
> -	 * not hold the i_mutex lock) while direct IO write has
> -	 * not completed. DIO request on holes finally falls back
> -	 * to buffered IO for this reason.
> -	 *
> -	 * For ext4 extent based file, since we support fallocate,
> -	 * new allocated extent as uninitialized, for holes, we
> -	 * could fallocate blocks for holes, thus parallel
> -	 * buffered IO read will zero out the page when read on
> -	 * a hole while parallel DIO write to the hole has not completed.
> -	 *
> -	 * when we come here, we know it's a direct IO write to
> -	 * to the middle of file (<i_size)
> -	 * so it's safe to override the create flag from VFS.
> +	 * ext4_get_block in prepare for a DIO write or buffer write.
> +	 * We allocate an uinitialized extent if blocks haven't been allocated.
> +	 * The extent will be converted to initialized after IO complete.
>  	 */
> -	create = EXT4_GET_BLOCKS_DIO_CREATE_EXT;
> +	create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
> 
>  	if (max_blocks > DIO_MAX_BLOCKS)
>  		max_blocks = DIO_MAX_BLOCKS;
> @@ -3524,19 +3510,20 @@ static void ext4_free_io_end(ext4_io_end_t *io)
>  	iput(io->inode);
>  	kfree(io);
>  }
> -static void dump_aio_dio_list(struct inode * inode)
> +
> +static void dump_completed_IO(struct inode * inode)
>  {
>  #ifdef	EXT4_DEBUG
>  	struct list_head *cur, *before, *after;
>  	ext4_io_end_t *io, *io0, *io1;
> 
> -	if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){
> -		ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino);
> +	if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
> +		ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
>  		return;
>  	}
> 
> -	ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino);
> -	list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){
> +	ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
> +	list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
>  		cur = &io->list;
>  		before = cur->prev;
>  		io0 = container_of(before, ext4_io_end_t, list);
> @@ -3552,21 +3539,21 @@ static void dump_aio_dio_list(struct inode * inode)
>  /*
>   * check a range of space and convert unwritten extents to written.
>   */
> -static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
> +static int ext4_end_io_nolock(ext4_io_end_t *io)
>  {
>  	struct inode *inode = io->inode;
>  	loff_t offset = io->offset;
>  	ssize_t size = io->size;
>  	int ret = 0;
> 
> -	ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p,"
> +	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
>  		   "list->prev 0x%p\n",
>  	           io, inode->i_ino, io->list.next, io->list.prev);
> 
>  	if (list_empty(&io->list))
>  		return ret;
> 
> -	if (io->flag != DIO_AIO_UNWRITTEN)
> +	if (io->flag != EXT4_IO_UNWRITTEN)
>  		return ret;
> 
>  	if (offset + size <= i_size_read(inode))
> @@ -3584,17 +3571,18 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
>  	io->flag = 0;
>  	return ret;
>  }
> +
>  /*
>   * work on completed aio dio IO, to convert unwritten extents to extents
>   */
> -static void ext4_end_aio_dio_work(struct work_struct *work)
> +static void ext4_end_io_work(struct work_struct *work)
>  {
>  	ext4_io_end_t *io  = container_of(work, ext4_io_end_t, work);
>  	struct inode *inode = io->inode;
>  	int ret = 0;
> 
>  	mutex_lock(&inode->i_mutex);
> -	ret = ext4_end_aio_dio_nolock(io);
> +	ret = ext4_end_io_nolock(io);
>  	if (ret >= 0) {
>  		if (!list_empty(&io->list))
>  			list_del_init(&io->list);
> @@ -3602,32 +3590,35 @@ static void ext4_end_aio_dio_work(struct work_struct *work)
>  	}
>  	mutex_unlock(&inode->i_mutex);
>  }
> +
>  /*
>   * This function is called from ext4_sync_file().
>   *
> - * When AIO DIO IO is completed, the work to convert unwritten
> - * extents to written is queued on workqueue but may not get immediately
> + * When IO is completed, the work to convert unwritten extents to
> + * written is queued on workqueue but may not get immediately
>   * scheduled. When fsync is called, we need to ensure the
>   * conversion is complete before fsync returns.
> - * The inode keeps track of a list of completed AIO from DIO path
> - * that might needs to do the conversion. This function walks through
> - * the list and convert the related unwritten extents to written.
> + * The inode keeps track of a list of pending/completed IO that
> + * might needs to do the conversion. This function walks through
> + * the list and convert the related unwritten extents for completed IO
> + * to written.
> + * The function return the number of pending IOs on success.
>   */
> -int flush_aio_dio_completed_IO(struct inode *inode)
> +int flush_completed_IO(struct inode *inode)
>  {
>  	ext4_io_end_t *io;
>  	int ret = 0;
>  	int ret2 = 0;
> 
> -	if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list))
> +	if (list_empty(&EXT4_I(inode)->i_completed_io_list))
>  		return ret;
> 
> -	dump_aio_dio_list(inode);
> -	while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){
> -		io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next,
> +	dump_completed_IO(inode);
> +	while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){
> +		io = list_entry(EXT4_I(inode)->i_completed_io_list.next,
>  				ext4_io_end_t, list);
>  		/*
> -		 * Calling ext4_end_aio_dio_nolock() to convert completed
> +		 * Calling ext4_end_io_nolock() to convert completed
>  		 * IO to written.
>  		 *
>  		 * When ext4_sync_file() is called, run_queue() may already
> @@ -3640,7 +3631,7 @@ int flush_aio_dio_completed_IO(struct inode *inode)
>  		 * avoid double converting from both fsync and background work
>  		 * queue work.
>  		 */
> -		ret = ext4_end_aio_dio_nolock(io);
> +		ret = ext4_end_io_nolock(io);
>  		if (ret < 0)
>  			ret2 = ret;
>  		else
> @@ -3662,7 +3653,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
>  		io->offset = 0;
>  		io->size = 0;
>  		io->error = 0;
> -		INIT_WORK(&io->work, ext4_end_aio_dio_work);
> +		INIT_WORK(&io->work, ext4_end_io_work);
>  		INIT_LIST_HEAD(&io->list);
>  	}
> 
> @@ -3685,7 +3676,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
>  		  size);
> 
>  	/* if not aio dio with unwritten extents, just free io and return */
> -	if (io_end->flag != DIO_AIO_UNWRITTEN){
> +	if (io_end->flag != EXT4_IO_UNWRITTEN){
>  		ext4_free_io_end(io_end);
>  		iocb->private = NULL;
>  		return;
> @@ -3700,9 +3691,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
> 
>  	/* Add the io_end to per-inode completed aio dio list*/
>  	list_add_tail(&io_end->list,
> -		 &EXT4_I(io_end->inode)->i_aio_dio_complete_list);
> +		 &EXT4_I(io_end->inode)->i_completed_io_list);
>  	iocb->private = NULL;
>  }
> +
>  /*
>   * For ext4 extent files, ext4 will do direct-io write to holes,
>   * preallocated extents, and those write extend the file, no need to
> @@ -3772,7 +3764,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
>  		ret = blockdev_direct_IO(rw, iocb, inode,
>  					 inode->i_sb->s_bdev, iov,
>  					 offset, nr_segs,
> -					 ext4_get_block_dio_write,
> +					 ext4_get_block_write,
>  					 ext4_end_io_dio);
>  		if (iocb->private)
>  			EXT4_I(inode)->cur_aio_dio = NULL;
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index c832508..dc7a97e 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -708,7 +708,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
>  #ifdef CONFIG_QUOTA
>  	ei->i_reserved_quota = 0;
>  #endif
> -	INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
> +	INIT_LIST_HEAD(&ei->i_completed_io_list);
>  	ei->cur_aio_dio = NULL;
>  	ei->i_sync_tid = 0;
>  	ei->i_datasync_tid = 0;
> -- 
> 1.6.6.1.1.g974db.dirty
> 

-aneesh
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ