lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Thu, 29 May 2008 14:52:50 +0200
From:	Jan Kara <jack@...e.cz>
To:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
Cc:	cmm@...ibm.com, tytso@....edu, sandeen@...hat.com,
	linux-ext4@...r.kernel.org
Subject: Re: [Updated PATCH] ext4: Use page_mkwrite vma_operations to get
	mmap write notification.

On Thu 29-05-08 17:33:45, Aneesh Kumar K.V wrote:
> We would like to get notified when we are doing a write on mmap section.
> This is needed with respect to preallocated area. We split the preallocated
> area into initialzed extent and uninitialzed extent in the call back. This
> let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
> that would result in data loss. The changes are also needed to handle ENOSPC
> when writing to an mmap section of files with holes.
> 
> Acked-by: Jan Kara <jack@...e.cz>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
> Signed-off-by: Mingming Cao <cmm@...ibm.com>
> Signed-off-by: "Theodore Ts'o" <tytso@....edu>
> ---
>  fs/ext4/ext4.h  |    1 +
>  fs/ext4/file.c  |   19 +++++++++++++-
>  fs/ext4/inode.c |   76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 95 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6605076..77cbb28 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1053,6 +1053,7 @@ extern void ext4_set_aops(struct inode *inode);
>  extern int ext4_writepage_trans_blocks(struct inode *);
>  extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
>  		struct address_space *mapping, loff_t from);
> +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
>  
>  /* ioctl.c */
>  extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 4159be6..b9510ba 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -123,6 +123,23 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
>  	return ret;
>  }
>  
> +static struct vm_operations_struct ext4_file_vm_ops = {
> +	.fault		= filemap_fault,
> +	.page_mkwrite   = ext4_page_mkwrite,
> +};
> +
> +static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	struct address_space *mapping = file->f_mapping;
> +
> +	if (!mapping->a_ops->readpage)
> +		return -ENOEXEC;
> +	file_accessed(file);
> +	vma->vm_ops = &ext4_file_vm_ops;
> +	vma->vm_flags |= VM_CAN_NONLINEAR;
> +	return 0;
> +}
> +
>  const struct file_operations ext4_file_operations = {
>  	.llseek		= generic_file_llseek,
>  	.read		= do_sync_read,
> @@ -133,7 +150,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
>  #ifdef CONFIG_COMPAT
>  	.compat_ioctl	= ext4_compat_ioctl,
>  #endif
> -	.mmap		= generic_file_mmap,
> +	.mmap		= ext4_file_mmap,
>  	.open		= generic_file_open,
>  	.release	= ext4_release_file,
>  	.fsync		= ext4_sync_file,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 4a7ed29..23e09eb 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3555,3 +3555,79 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
>  
>  	return err;
>  }
> +
> +static int ext4_bh_prepare_fill(handle_t *handle, struct buffer_head *bh)
> +{
> +	if (!buffer_mapped(bh)) {
> +		/*
> +		 * Mark buffer as dirty so that
> +		 * block_write_full_page() writes it
> +		 */
> +		set_buffer_dirty(bh);
> +	}
> +	return 0;
> +}
> +
> +static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
> +{
> +	return !buffer_mapped(bh);
> +}
> +
> +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
> +{
> +	loff_t size;
> +	unsigned long len;
> +	int ret = -EINVAL;
> +	struct file *file = vma->vm_file;
> +	struct inode *inode = file->f_path.dentry->d_inode;
> +	struct address_space *mapping = inode->i_mapping;
> +	struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE,
> +					 .nr_to_write = 1 };
> +
> +	/*
> +	 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
> +	 * get i_mutex because we are already holding mmap_sem.
> +	 */
> +	down_read(&inode->i_alloc_sem);
> +	size = i_size_read(inode);
> +	if (page->mapping != mapping || size <= page_offset(page)
> +	    || !PageUptodate(page)) {
> +		/* page got truncated from under us? */
> +		goto out_unlock;
> +	}
> +	ret = 0;
> +	if (PageMappedToDisk(page))
> +		goto out_unlock;
> +
> +	if (page->index == size >> PAGE_CACHE_SHIFT)
> +		len = size & ~PAGE_CACHE_MASK;
> +	else
> +		len = PAGE_CACHE_SIZE;
> +
> +	if (page_has_buffers(page)) {
> +		/* return if we have all the buffers mapped */
> +		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
> +				       ext4_bh_unmapped))
> +			goto out_unlock;
> +		/*
> +		 * Now mark all the  buffer head dirty so
> +		 * that writepage can write it
> +		 */
> +		walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE,
> +					NULL, ext4_bh_prepare_fill);
  Just a minor nit - probably use 'len' here instead of PAGE_CACHE_SIZE.
It doesn't sound right to mark buffers dirty beyond end of file...

> +	}
> +	/*
> +	 * OK, we need to fill the hole... Lock the page and do writepage.
> +	 * We can't do write_begin and write_end here because we don't
> +	 * have inode_mutex and that allow parallel write_begin, write_end call.
> +	 * (lock_page prevent this from happening on the same page though)
> +	 */
> +	lock_page(page);
> +	wbc.range_start = page_offset(page);
> +	wbc.range_end = page_offset(page) + len;
> +	ret = mapping->a_ops->writepage(page, &wbc);
> +	/* writepage unlocks the page */
> +out_unlock:
> +	up_read(&inode->i_alloc_sem);
> +	return ret;
> +}
> -- 
> 1.5.5.1.357.g1af8b.dirty

									Honza
-- 
Jan Kara <jack@...e.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ