[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1203703848.3639.9.camel@localhost.localdomain>
Date: Fri, 22 Feb 2008 10:10:48 -0800
From: Mingming Cao <cmm@...ibm.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
Cc: tytso@....edu, linux-ext4@...r.kernel.org
Subject: Re: [PATCH] ext4: Use page_mkwrite vma_operations to get mmap
write notification.
On Fri, 2008-02-22 at 20:09 +0530, Aneesh Kumar K.V wrote:
> We would like to get notified when we are doing a write on mmap section.
> This is needed with respect to preallocated area. We split the preallocated
> area into initialzed extent and uninitialzed extent in the call back. This
> let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
> that would result in data loss. The changes are also needed to handle ENOSPC
> when writing to an mmap section of files with holes.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.vnet.ibm.com>
> ---
> fs/ext4/file.c | 19 ++++++++++++++-
> fs/ext4/inode.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/ext4_fs.h | 1 +
> 3 files changed, 79 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 20507a2..77341c1 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -123,6 +123,23 @@ force_commit:
> return ret;
> }
>
> +static struct vm_operations_struct ext4_file_vm_ops = {
> + .fault = filemap_fault,
> + .page_mkwrite = ext4_page_mkwrite,
> +};
> +
> +static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct address_space *mapping = file->f_mapping;
> +
> + if (!mapping->a_ops->readpage)
> + return -ENOEXEC;
> + file_accessed(file);
> + vma->vm_ops = &ext4_file_vm_ops;
> + vma->vm_flags |= VM_CAN_NONLINEAR;
> + return 0;
> +}
> +
> const struct file_operations ext4_file_operations = {
> .llseek = generic_file_llseek,
> .read = do_sync_read,
> @@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
> #ifdef CONFIG_COMPAT
> .compat_ioctl = ext4_compat_ioctl,
> #endif
> - .mmap = generic_file_mmap,
> + .mmap = ext4_file_mmap,
> .open = generic_file_open,
> .release = ext4_release_file,
> .fsync = ext4_sync_file,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 5b5d63d..00af97d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3490,3 +3490,63 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
>
> return err;
> }
> +
> +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
> +{
> + unsigned long end;
> + loff_t size;
> + handle_t *handle;
> + int ret = -EINVAL, needed_blocks;
> + struct file *file = vma->vm_file;
> + struct inode *inode = file->f_path.dentry->d_inode;
> +
> + needed_blocks = ext4_writepage_trans_blocks(inode);
> + /* We need to take inode mutex to prevent parallel write */
> + mutex_lock(&inode->i_mutex);
> + lock_page(page);
> + size = i_size_read(inode);
> + if ((page->mapping != inode->i_mapping) ||
> + (page_offset(page) > size)) {
> + /* page got truncated out from underneath us */
> + goto out_unlock;
> + }
> +
> + /* page is wholly or partially inside EOF */
> + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
> + end = size & ~PAGE_CACHE_MASK;
> + else
> + end = PAGE_CACHE_SIZE;
> +
> + handle = ext4_journal_start(inode, needed_blocks);
> + if (IS_ERR(handle)) {
> + ret = PTR_ERR(handle);
> + goto out_unlock;
> + }
> + /* Will zero out the pages if buffer is marked new */
> + ret = block_prepare_write(page, 0, end, ext4_get_block);
> +
> + if (!ret && ext4_should_journal_data(inode)) {
> + ret = walk_page_buffers(handle, page_buffers(page),
> + 0, end, NULL, do_journal_get_write_access);
> + if (!ret)
> + ret = walk_page_buffers(handle, page_buffers(page),
> + 0, end, NULL, write_end_fn);
> + /*
> + * we don't want to call block_commit_write in journalled mode
> + */
> + ext4_journal_stop(handle);
> + goto out_unlock;
> + }
> + if (!ret && ext4_should_order_data(inode)) {
> + ret = walk_page_buffers(handle, page_buffers(page),
> + 0, end, NULL, ext4_journal_dirty_data);
> + }
> + if (!ret)
> + ret = block_commit_write(page, 0, end);
> +
Hmm, it seems wired to do commit_write when the page is about becoming
writable, but maybe that's the way it needs to?
Don't we need to update the i_size somewhere?
> + ext4_journal_stop(handle);
> +out_unlock:
> + unlock_page(page);
> + mutex_unlock(&inode->i_mutex);
> + return ret;
> +}
It seems this combined the three journalling mode prepare_write() code
here:(
Since prepare_write() and commit_write() is going to sunset, why not
simply calling mappings->a_ops->write_begin() and then write_end()? that
should take care of pretty much the journalling and the page operations,
no?
Mingming
> diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
> index 22810b1..8f5a563 100644
> --- a/include/linux/ext4_fs.h
> +++ b/include/linux/ext4_fs.h
> @@ -1059,6 +1059,7 @@ extern void ext4_set_aops(struct inode *inode);
> extern int ext4_writepage_trans_blocks(struct inode *);
> extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
> struct address_space *mapping, loff_t from);
> +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists