[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJfpegskt2zmKyx5sB49em86HqEJH3mZFH5hnuDXPuD_EtYzyw@mail.gmail.com>
Date: Thu, 25 Apr 2013 12:35:13 +0200
From: Miklos Szeredi <miklos@...redi.hu>
To: "Maxim V. Patlasov" <MPatlasov@...allels.com>
Cc: Kirill Korotaev <dev@...allels.com>,
Pavel Emelianov <xemul@...allels.com>,
fuse-devel <fuse-devel@...ts.sourceforge.net>,
Kernel Mailing List <linux-kernel@...r.kernel.org>,
James Bottomley <jbottomley@...allels.com>,
Al Viro <viro@...iv.linux.org.uk>,
Linux-Fsdevel <linux-fsdevel@...r.kernel.org>, devel@...nvz.org
Subject: Re: [PATCH 09/14] fuse: Implement writepages and write_begin/write_end
callbacks - v3
On Mon, Apr 1, 2013 at 12:42 PM, Maxim V. Patlasov
<MPatlasov@...allels.com> wrote:
> The .writepages one is required to make each writeback request carry more than
> one page on it.
I'd split this into two parts:
1) implement ->writepages() and enable it unconditionally for mmaped
writeback (why is it not enabled by this patch?)
2) implement ->write_begin() and ->write_end() and conditionally
enable cached writeback
More comments inline.
>
> Changed in v2:
> - fixed fuse_prepare_write() to avoid reads beyond EOF
> - fixed fuse_prepare_write() to zero uninitialized part of page
>
> Changed in v3:
> - moved common part of fuse_readpage() and fuse_prepare_write() to
> __fuse_readpage().
>
> Original patch by: Pavel Emelyanov <xemul@...nvz.org>
> Signed-off-by: Maxim V. Patlasov <MPatlasov@...allels.com>
> ---
> fs/fuse/file.c | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 files changed, 303 insertions(+), 19 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 5509c0b..6ceffdf 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -588,20 +588,13 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
> }
> }
>
> -static int fuse_readpage(struct file *file, struct page *page)
> +static int __fuse_readpage(struct file *file, struct page *page, size_t count,
> + int *err, struct fuse_req **req_pp, u64 *attr_ver_p)
> {
> struct inode *inode = page->mapping->host;
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_req *req;
> size_t num_read;
> - loff_t pos = page_offset(page);
> - size_t count = PAGE_CACHE_SIZE;
> - u64 attr_ver;
> - int err;
> -
> - err = -EIO;
> - if (is_bad_inode(inode))
> - goto out;
>
Make this a separate patch too. Restructuring ...readpage() doesn't
have a lot to do with implementing ...writepages().
> /*
> * Page writeback can extend beyond the lifetime of the
> @@ -611,20 +604,45 @@ static int fuse_readpage(struct file *file, struct page *page)
> fuse_wait_on_page_writeback(inode, page->index);
>
> req = fuse_get_req(fc, 1);
> - err = PTR_ERR(req);
> + *err = PTR_ERR(req);
> if (IS_ERR(req))
> - goto out;
> + return 0;
>
> - attr_ver = fuse_get_attr_version(fc);
> + if (attr_ver_p)
> + *attr_ver_p = fuse_get_attr_version(fc);
>
> req->out.page_zeroing = 1;
> req->out.argpages = 1;
> req->num_pages = 1;
> req->pages[0] = page;
> req->page_descs[0].length = count;
> - num_read = fuse_send_read(req, file, pos, count, NULL);
> - err = req->out.h.error;
>
> + num_read = fuse_send_read(req, file, page_offset(page), count, NULL);
> + *err = req->out.h.error;
> +
> + if (*err)
> + fuse_put_request(fc, req);
> + else
> + *req_pp = req;
> +
> + return num_read;
> +}
> +
> +static int fuse_readpage(struct file *file, struct page *page)
> +{
> + struct inode *inode = page->mapping->host;
> + struct fuse_conn *fc = get_fuse_conn(inode);
> + struct fuse_req *req = NULL;
> + size_t num_read;
> + size_t count = PAGE_CACHE_SIZE;
> + u64 attr_ver;
> + int err;
> +
> + err = -EIO;
> + if (is_bad_inode(inode))
> + goto out;
> +
> + num_read = __fuse_readpage(file, page, count, &err, &req, &attr_ver);
> if (!err) {
> /*
> * Short read means EOF. If file size is larger, truncate it
> @@ -634,10 +652,11 @@ static int fuse_readpage(struct file *file, struct page *page)
>
> SetPageUptodate(page);
> }
> -
> - fuse_put_request(fc, req);
> - fuse_invalidate_attr(inode); /* atime changed */
> - out:
> + if (req) {
> + fuse_put_request(fc, req);
> + fuse_invalidate_attr(inode); /* atime changed */
> + }
> +out:
> unlock_page(page);
> return err;
> }
> @@ -702,7 +721,10 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
>
> struct fuse_fill_data {
> struct fuse_req *req;
> - struct file *file;
> + union {
> + struct file *file;
> + struct fuse_file *ff;
> + };
Don't do a union. Make two separate structures, one for read and one for write.
> struct inode *inode;
> unsigned nr_pages;
> };
> @@ -1511,6 +1533,265 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
> return err;
> }
>
> +static int fuse_send_writepages(struct fuse_fill_data *data)
> +{
> + int i, all_ok = 1;
> + struct fuse_req *req = data->req;
> + struct inode *inode = data->inode;
> + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
> + struct fuse_conn *fc = get_fuse_conn(inode);
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + loff_t off = -1;
> +
> + if (!data->ff)
> + data->ff = fuse_write_file(fc, fi);
> +
> + if (!data->ff) {
> + for (i = 0; i < req->num_pages; i++)
> + end_page_writeback(req->pages[i]);
> + return -EIO;
> + }
> +
> + req->inode = inode;
> + req->misc.write.in.offset = page_offset(req->pages[0]);
We need to set req->background.
> +
> + spin_lock(&fc->lock);
> + list_add(&req->writepages_entry, &fi->writepages);
> + spin_unlock(&fc->lock);
> +
> + for (i = 0; i < req->num_pages; i++) {
> + struct page *page = req->pages[i];
> + struct page *tmp_page;
> +
> + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
> + if (tmp_page) {
> + copy_highpage(tmp_page, page);
> + inc_bdi_stat(bdi, BDI_WRITEBACK);
> + inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
Is there a reason why we do the page allocation/copying here instead
of at fill time? I'd guess it would be simpler and more logical
there.
> + } else
> + all_ok = 0;
> + req->pages[i] = tmp_page;
> + if (i == 0)
> + off = page_offset(page);
> +
> + end_page_writeback(page);
> + }
> +
> + if (!all_ok) {
> + for (i = 0; i < req->num_pages; i++) {
> + struct page *page = req->pages[i];
> + if (page) {
> + dec_bdi_stat(bdi, BDI_WRITEBACK);
> + dec_zone_page_state(page, NR_WRITEBACK_TEMP);
> + __free_page(page);
> + req->pages[i] = NULL;
> + }
> + }
> +
> + spin_lock(&fc->lock);
> + list_del(&req->writepages_entry);
> + wake_up(&fi->page_waitq);
> + spin_unlock(&fc->lock);
> + return -ENOMEM;
> + }
> +
> + req->ff = fuse_file_get(data->ff);
> + fuse_write_fill(req, data->ff, off, 0);
> +
> + req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
> + req->in.argpages = 1;
> + fuse_page_descs_length_init(req, 0, req->num_pages);
> + req->end = fuse_writepage_end;
> +
> + spin_lock(&fc->lock);
> + list_add_tail(&req->list, &fi->queued_writes);
> + fuse_flush_writepages(data->inode);
> + spin_unlock(&fc->lock);
> +
> + return 0;
> +}
> +
> +static int fuse_writepages_fill(struct page *page,
> + struct writeback_control *wbc, void *_data)
> +{
> + struct fuse_fill_data *data = _data;
> + struct fuse_req *req = data->req;
> + struct inode *inode = data->inode;
> + struct fuse_conn *fc = get_fuse_conn(inode);
> +
> + if (fuse_page_is_writeback(inode, page->index)) {
> + if (wbc->sync_mode != WB_SYNC_ALL) {
> + redirty_page_for_writepage(wbc, page);
> + unlock_page(page);
> + return 0;
> + }
> + fuse_wait_on_page_writeback(inode, page->index);
> + }
> +
> + if (req->num_pages &&
> + (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
> + (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
> + req->pages[req->num_pages - 1]->index + 1 != page->index)) {
> + int err;
> +
> + err = fuse_send_writepages(data);
> + if (err) {
> + unlock_page(page);
> + return err;
> + }
> +
> + data->req = req =
> + fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
> + if (!req) {
> + unlock_page(page);
> + return -ENOMEM;
> + }
> + }
> +
> + req->pages[req->num_pages] = page;
> + req->num_pages++;
> +
> + if (test_set_page_writeback(page))
> + BUG();
> +
> + unlock_page(page);
> +
> + return 0;
> +}
> +
> +static int fuse_writepages(struct address_space *mapping,
> + struct writeback_control *wbc)
> +{
> + struct inode *inode = mapping->host;
> + struct fuse_conn *fc = get_fuse_conn(inode);
> + struct fuse_fill_data data;
> + int err;
> +
> + if (!fc->writeback_cache)
> + return generic_writepages(mapping, wbc);
> +
> + err = -EIO;
> + if (is_bad_inode(inode))
> + goto out;
> +
> + data.ff = NULL;
> + data.inode = inode;
> + data.req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
> + err = -ENOMEM;
> + if (!data.req)
> + goto out_put;
> +
> + err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
> + if (data.req) {
> + if (!err && data.req->num_pages) {
> + err = fuse_send_writepages(&data);
> + if (err)
> + fuse_put_request(fc, data.req);
> + } else
> + fuse_put_request(fc, data.req);
I think fuse_send_writepages() should grab a ref to the request with
__fuse_get_request() if it successfully sent the request. Then we can
unconditionally do the fuse_put_request() here.
> + }
> +out_put:
> + if (data.ff)
> + fuse_file_put(data.ff, false);
> +out:
> + return err;
> +}
> +
> +/*
> + * Determine the number of bytes of data the page contains
> + */
> +static inline unsigned fuse_page_length(struct page *page)
> +{
> + loff_t i_size = i_size_read(page_file_mapping(page)->host);
> +
> + if (i_size > 0) {
> + pgoff_t page_index = page_file_index(page);
> + pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
> + if (page_index < end_index)
> + return PAGE_CACHE_SIZE;
> + if (page_index == end_index)
> + return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
> + }
> + return 0;
> +}
> +
> +static int fuse_prepare_write(struct fuse_conn *fc, struct file *file,
> + struct page *page, loff_t pos, unsigned len)
> +{
> + struct fuse_req *req = NULL;
> + unsigned num_read;
> + unsigned page_len;
> + int err;
> +
> + if (PageUptodate(page) || (len == PAGE_CACHE_SIZE))
> + return 0;
> +
> + page_len = fuse_page_length(page);
> + if (!page_len) {
> + zero_user(page, 0, PAGE_CACHE_SIZE);
> + return 0;
> + }
> +
> + num_read = __fuse_readpage(file, page, page_len, &err, &req, NULL);
> + if (req)
> + fuse_put_request(fc, req);
> + if (err) {
> + unlock_page(page);
> + page_cache_release(page);
> + } else if (num_read != PAGE_CACHE_SIZE) {
> + zero_user_segment(page, num_read, PAGE_CACHE_SIZE);
> + }
> + return err;
> +}
> +
> +/*
> + * It's worthy to make sure that space is reserved on disk for the write,
> + * but how to implement it without killing performance need more thinking.
> + */
> +static int fuse_write_begin(struct file *file, struct address_space *mapping,
> + loff_t pos, unsigned len, unsigned flags,
> + struct page **pagep, void **fsdata)
> +{
> + pgoff_t index = pos >> PAGE_CACHE_SHIFT;
> + struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
> +
> + BUG_ON(!fc->writeback_cache);
> +
> + *pagep = grab_cache_page_write_begin(mapping, index, flags);
> + if (!*pagep)
> + return -ENOMEM;
> +
> + return fuse_prepare_write(fc, file, *pagep, pos, len);
> +}
> +
> +static int fuse_commit_write(struct file *file, struct page *page,
> + unsigned from, unsigned to)
> +{
> + struct inode *inode = page->mapping->host;
> + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
> +
> + if (!PageUptodate(page))
> + SetPageUptodate(page);
> +
> + fuse_write_update_size(inode, pos);
> + set_page_dirty(page);
> + return 0;
> +}
> +
> +static int fuse_write_end(struct file *file, struct address_space *mapping,
> + loff_t pos, unsigned len, unsigned copied,
> + struct page *page, void *fsdata)
> +{
> + unsigned from = pos & (PAGE_CACHE_SIZE - 1);
> +
> + fuse_commit_write(file, page, from, from+copied);
> +
> + unlock_page(page);
> + page_cache_release(page);
> +
> + return copied;
> +}
> +
> static int fuse_launder_page(struct page *page)
> {
> int err = 0;
> @@ -2419,11 +2700,14 @@ static const struct file_operations fuse_direct_io_file_operations = {
> static const struct address_space_operations fuse_file_aops = {
> .readpage = fuse_readpage,
> .writepage = fuse_writepage,
> + .writepages = fuse_writepages,
> .launder_page = fuse_launder_page,
> .readpages = fuse_readpages,
> .set_page_dirty = __set_page_dirty_nobuffers,
> .bmap = fuse_bmap,
> .direct_IO = fuse_direct_IO,
> + .write_begin = fuse_write_begin,
> + .write_end = fuse_write_end,
> };
>
> void fuse_init_file_inode(struct inode *inode)
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists