[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5671C069.7090300@studio.unibo.it>
Date: Wed, 16 Dec 2015 20:50:01 +0100
From: Luca Risolia <luca.risolia@...dio.unibo.it>
To: "Jader H. Silva" <jaderhs5@...il.com>,
Miklos Szeredi <miklos@...redi.hu>,
Andrew Morton <akpm@...ux-foundation.org>,
<linux-fsdevel@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
Nikolaus Rath <Nikolaus@...h.org>
Subject: Re: [PATCH] fuse: implement cuse mmap
I tested this patch and gave some hints to Jader when it first appeared
on the libfuse mailing list some months ago.
Signed-off-by: Luca Risolia <luca.risolia@...dio.unibo.it>
Jader H. Silva wrote:
> Implement cuse mmap using shmem to provide the actual memory maps.
> Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.
>
> Signed-off-by: Jader H. Silva <jaderhs5@...il.com>
> ---
> fs/fuse/cuse.c | 459 +++++++++++++++++++++++++++++++++++++++++++++-
> fs/fuse/dev.c | 163 +---------------
> fs/fuse/fuse_i.h | 34 +++-
> fs/fuse/inode.c | 166 ++++++++++++++++-
> include/uapi/linux/fuse.h | 26 +++
> 5 files changed, 688 insertions(+), 160 deletions(-)
>
> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> index eae2c11..7749c13 100644
> --- a/fs/fuse/cuse.c
> +++ b/fs/fuse/cuse.c
> @@ -48,6 +48,9 @@
> #include <linux/stat.h>
> #include <linux/module.h>
> #include <linux/uio.h>
> +#include <linux/mman.h>
> +#include <linux/falloc.h>
> +#include <linux/shmem_fs.h>
>
> #include "fuse_i.h"
>
> @@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
> return fuse_do_ioctl(file, cmd, arg, flags);
> }
>
> +struct fuse_dmmap_region {
> + u64 mapid;
> + u64 size;
> + struct file *filp;
> + struct vm_operations_struct vm_ops;
> + const struct vm_operations_struct *vm_original_ops;
> + struct list_head list;
> + atomic_t ref;
> +};
> +
> +/*
> + * fuse_dmmap_vm represents the result of a single mmap() call, which
> + * can be shared by multiple client vmas created by forking.
> + */
> +struct fuse_dmmap_vm {
> + u64 len;
> + u64 off;
> + atomic_t open_count;
> + struct fuse_dmmap_region *region;
> +};
> +
> +static void fuse_dmmap_region_put(struct fuse_conn *fc,
> + struct fuse_dmmap_region *fdr)
> +{
> + if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
> +
> + list_del(&fdr->list);
> +
> + spin_unlock(&fc->lock);
> +
> + fput(fdr->filp);
> + kfree(fdr);
> + }
> +}
> +
> +static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
> +{
> + struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> + struct fuse_dmmap_region *fdr = fdvm->region;
> +
> + /* vma copied */
> + atomic_inc(&fdvm->open_count);
> +
> + if (fdr->vm_original_ops->open)
> + fdr->vm_original_ops->open(vma);
> +}
> +
> +static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
> +{
> + struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> + struct fuse_dmmap_region *fdr = fdvm->region;
> + struct fuse_file *ff = vma->vm_file->private_data;
> + struct fuse_conn *fc = ff->fc;
> + struct fuse_req *req;
> + struct fuse_munmap_in *inarg;
> +
> + if (fdr->vm_original_ops->close)
> + fdr->vm_original_ops->close(vma);
> +
> + if (!atomic_dec_and_test(&fdvm->open_count))
> + return;
> +
> + /*
> + * Notify server that the mmap region has been unmapped.
> + * Failing this might lead to resource leak in server, don't
> + * fail.
> + */
> + req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
> + inarg = &req->misc.munmap_in;
> +
> + inarg->fh = ff->fh;
> + inarg->mapid = fdvm->region->mapid;
> + inarg->size = fdvm->len;
> + inarg->offset = fdvm->off;
> +
> + req->in.h.opcode = FUSE_MUNMAP;
> + req->in.h.nodeid = ff->nodeid;
> + req->in.numargs = 1;
> + req->in.args[0].size = sizeof(*inarg);
> + req->in.args[0].value = inarg;
> +
> + fuse_request_send(fc, req);
> + fuse_put_request(fc, req);
> + fuse_dmmap_region_put(fc, fdvm->region);
> + kfree(fdvm);
> +}
> +
> +static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> + int ret;
> + struct file *filp = vma->vm_file;
> + struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> + struct fuse_dmmap_region *fdr = fdvm->region;
> +
> + vma->vm_file = fdr->filp;
> + ret = fdr->vm_original_ops->fault(vma, vmf);
> +
> + vma->vm_file = filp;
> +
> + return ret;
> +}
> +
> +static const struct vm_operations_struct fuse_dmmap_vm_ops = {
> + .open = fuse_dmmap_vm_open,
> + .close = fuse_dmmap_vm_close,
> + .fault = fuse_dmmap_vm_fault,
> +};
> +
> +static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
> + u64 mapid)
> +{
> + struct fuse_dmmap_region *curr;
> + struct fuse_dmmap_region *fdr = NULL;
> +
> + list_for_each_entry(curr, &fc->dmmap_list, list) {
> + if (curr->mapid == mapid) {
> + fdr = curr;
> + atomic_inc(&fdr->ref);
> + break;
> + }
> + }
> +
> + return fdr;
> +}
> +
> +static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
> + u64 mapid)
> +{
> + struct fuse_dmmap_region *fdr;
> +
> + spin_lock(&fc->lock);
> + fdr = fuse_dmmap_find_locked(fc, mapid);
> + spin_unlock(&fc->lock);
> +
> + return fdr;
> +}
> +
> +static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
> + struct file *file, u64 mapid,
> + u64 size, unsigned long flags)
> +{
> + struct fuse_dmmap_region *fdr;
> + char *pathbuf, *filepath;
> + struct file *shmem_file;
> +
> + fdr = fuse_dmmap_find(fc, mapid);
> + if (!fdr) {
> + struct fuse_dmmap_region *tmp;
> +
> + fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
> + if (!fdr)
> + return ERR_PTR(-ENOMEM);
> +
> + atomic_set(&fdr->ref, 1);
> +
> + pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
> + if (!pathbuf) {
> + kfree(fdr);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
> + if (IS_ERR(filepath)) {
> + kfree(fdr);
> + kfree(pathbuf);
> + return (struct fuse_dmmap_region *) filepath;
> + }
> +
> + fdr->mapid = mapid;
> + shmem_file = shmem_file_setup(filepath, size, flags);
> + kfree(pathbuf);
> +
> + if (IS_ERR(shmem_file)) {
> + kfree(fdr);
> + return (struct fuse_dmmap_region *) shmem_file;
> + }
> +
> + fdr->filp = shmem_file;
> +
> + spin_lock(&fc->lock);
> + tmp = fuse_dmmap_find_locked(fc, mapid);
> + if (tmp) {
> + fput(fdr->filp);
> + kfree(fdr);
> + fdr = tmp;
> + } else {
> + INIT_LIST_HEAD(&fdr->list);
> + list_add(&fdr->list, &fc->dmmap_list);
> + }
> + spin_unlock(&fc->lock);
> + }
> +
> + if (size > fdr->size) {
> +
> + fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
> + fdr->size = size;
> + }
> +
> + return fdr;
> +}
> +
> +static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + int err;
> + struct fuse_file *ff = file->private_data;
> + struct fuse_conn *fc = ff->fc;
> + struct fuse_dmmap_vm *fdvm;
> + struct fuse_dmmap_region *fdr;
> + struct fuse_req *req = NULL;
> + struct fuse_mmap_in inarg;
> + struct fuse_mmap_out outarg;
> +
> + if (fc->no_dmmap)
> + return -ENOSYS;
> +
> + req = fuse_get_req(fc, 0);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> +
> + /* ask server whether this mmap is okay and what the size should be */
> + memset(&inarg, 0, sizeof(inarg));
> + inarg.fh = ff->fh;
> + inarg.addr = vma->vm_start;
> + inarg.len = vma->vm_end - vma->vm_start;
> + inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
> + ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
> + ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
> + inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
> + ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
> + ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
> + ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
> + ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
> + inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
> +
> + req->in.h.opcode = FUSE_MMAP;
> + req->in.h.nodeid = ff->nodeid;
> + req->in.numargs = 1;
> + req->in.args[0].size = sizeof(inarg);
> + req->in.args[0].value = &inarg;
> + req->out.numargs = 1;
> + req->out.args[0].size = sizeof(outarg);
> + req->out.args[0].value = &outarg;
> +
> + fuse_request_send(fc, req);
> + err = req->out.h.error;
> + if (err) {
> + if (err == -ENOSYS)
> + fc->no_dmmap = 1;
> + goto free_req;
> + }
> +
> + fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
> + vma->vm_flags);
> + err = PTR_ERR(fdr);
> + if (IS_ERR(fdr))
> + goto free_req;
> +
> + err = -ENOMEM;
> +
> + fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
> + if (!fdvm) {
> + fuse_dmmap_region_put(fc, fdr);
> + goto free_req;
> + }
> +
> + atomic_set(&fdvm->open_count, 1);
> + fdvm->region = fdr;
> + fdvm->len = inarg.len;
> + fdvm->off = inarg.offset;
> +
> + fdr->filp->f_op->mmap(fdr->filp, vma);
> +
> + memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
> + fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
> + fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
> + fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
> +
> + fdr->vm_original_ops = vma->vm_ops;
> +
> + vma->vm_ops = &fdr->vm_ops;
> +
> + vma->vm_private_data = fdvm;
> + vma->vm_flags |= VM_DONTEXPAND; /* disallow expansion for now */
> + err = 0;
> +
> +free_req:
> + fuse_put_request(fc, req);
> + return err;
> +}
> +
> +static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
> + struct fuse_copy_state *cs,
> + u64 nodeid, u32 size, u64 pos)
> +{
> + struct fuse_dmmap_region *fdr;
> + struct file *filp;
> + pgoff_t index;
> + unsigned int off;
> + int err;
> +
> + fdr = fuse_dmmap_find(fc, nodeid);
> + if (!fdr)
> + return -ENOENT;
> +
> + index = pos >> PAGE_SHIFT;
> + off = pos & ~PAGE_MASK;
> + if (pos > fdr->size)
> + size = 0;
> + else if (size > fdr->size - pos)
> + size = fdr->size - pos;
> +
> + filp = fdr->filp;
> +
> + while (size) {
> + struct page *page;
> + unsigned int this_num;
> +
> + page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
> + index, GFP_HIGHUSER);
> + if (IS_ERR(page)) {
> +
> + err = -ENOMEM;
> + goto out_iput;
> + }
> +
> + this_num = min_t(unsigned, size, PAGE_SIZE - off);
> + err = fuse_copy_page(cs, &page, off, this_num, 0);
> +
> + unlock_page(page);
> + page_cache_release(page);
> +
> + if (err)
> + goto out_iput;
> +
> + size -= this_num;
> + off = 0;
> + index++;
> + }
> +
> + err = 0;
> +
> +out_iput:
> + fuse_dmmap_region_put(fc, fdr);
> +
> + return err;
> +}
> +
> +static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
> +{
> + release_pages(req->pages, req->num_pages, 0);
> +}
> +
> +static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
> + struct fuse_notify_retrieve_out *outarg)
> +{
> + struct fuse_dmmap_region *fdr;
> + struct fuse_req *req;
> + struct page *page;
> + struct file *filp;
> + pgoff_t index;
> + unsigned int num;
> + unsigned int offset;
> + unsigned int npages;
> + unsigned int this_num;
> + size_t total_len = 0;
> + int err;
> +
> + fdr = fuse_dmmap_find(fc, outarg->nodeid);
> + if (!fdr)
> + return -ENOENT;
> +
> + npages = outarg->size >> PAGE_SHIFT;
> + if (outarg->size & ~PAGE_MASK)
> + npages++;
> +
> + req = fuse_get_req(fc, npages);
> + err = PTR_ERR(req);
> + if (IS_ERR(req))
> + goto out_put_region;
> +
> + offset = outarg->offset & ~PAGE_MASK;
> +
> + req->in.h.opcode = FUSE_NOTIFY_REPLY;
> + req->in.h.nodeid = outarg->nodeid;
> + req->in.numargs = 2;
> + req->in.argpages = 1;
> + req->end = fuse_retrieve_dmmap_end;
> +
> + index = outarg->offset >> PAGE_SHIFT;
> + num = outarg->size;
> + if (outarg->offset > fdr->size)
> + num = 0;
> + else if (outarg->offset + num > fdr->size)
> + num = fdr->size - outarg->offset;
> +
> + filp = fdr->filp;
> +
> + npages = 0;
> + while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> +
> + page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
> + index,
> + GFP_KERNEL);
> + if (IS_ERR(page)) {
> + err = -ENOMEM;
> + goto out_put_region;
> + }
> +
> + this_num = min_t(unsigned, num, PAGE_SIZE - offset);
> + req->pages[req->num_pages] = page;
> + req->page_descs[req->num_pages].length = this_num;
> + req->num_pages++;
> +
> + num -= this_num;
> + total_len += this_num;
> + index++;
> + npages++;
> + }
> + req->misc.retrieve_in.offset = outarg->offset;
> + req->misc.retrieve_in.size = total_len;
> + req->in.args[0].size = sizeof(req->misc.retrieve_in);
> + req->in.args[0].value = &req->misc.retrieve_in;
> + req->in.args[1].size = total_len;
> +
> + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> + if (err)
> + fuse_retrieve_dmmap_end(fc, req);
> +
> +out_put_region:
> + fuse_dmmap_region_put(fc, fdr);
> +
> + return err;
> +}
> +
> +
> static const struct file_operations cuse_frontend_fops = {
> .owner = THIS_MODULE,
> .read_iter = cuse_read_iter,
> @@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
> .unlocked_ioctl = cuse_file_ioctl,
> .compat_ioctl = cuse_file_compat_ioctl,
> .poll = fuse_file_poll,
> - .llseek = noop_llseek,
> + .llseek = noop_llseek,
> + .mmap = cuse_mmap,
> };
>
>
> @@ -468,10 +907,26 @@ err:
>
> static void cuse_fc_release(struct fuse_conn *fc)
> {
> + struct fuse_dmmap_region *fdr;
> struct cuse_conn *cc = fc_to_cc(fc);
> +
> + spin_lock(&fc->lock);
> + while (!list_empty(&fc->dmmap_list)) {
> +
> + fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
> + fuse_dmmap_region_put(fc, fdr);
> + }
> + spin_unlock(&fc->lock);
> +
> kfree_rcu(cc, fc.rcu);
> }
>
> +static const struct fuse_conn_operations cuse_ops = {
> + .release = cuse_fc_release,
> + .notify_store = fuse_notify_store_to_dmmap,
> + .notify_retrieve = fuse_notify_retrieve_from_dmmap,
> +};
> +
> /**
> * cuse_channel_open - open method for /dev/cuse
> * @inode: inode for /dev/cuse
> @@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
> }
>
> INIT_LIST_HEAD(&cc->list);
> - cc->fc.release = cuse_fc_release;
> + cc->fc.ops = &cuse_ops;
>
> cc->fc.initialized = 1;
> rc = cuse_send_init(cc);
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 80cc1b3..0faf92c 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
> __clear_bit(FR_BACKGROUND, &req->flags);
> return req;
> }
> +EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);
>
> void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
> {
> @@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
> }
> EXPORT_SYMBOL_GPL(fuse_request_send_background);
>
> -static int fuse_request_send_notify_reply(struct fuse_conn *fc,
> - struct fuse_req *req, u64 unique)
> +int fuse_request_send_notify_reply(struct fuse_conn *fc,
> + struct fuse_req *req, u64 unique)
> {
> int err = -ENODEV;
> struct fuse_iqueue *fiq = &fc->iq;
> @@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
> }
> return err;
> }
> +EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);
>
> /*
> * Unlock request. If it was aborted while locked, caller is responsible
> @@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
> * Copy a page in the request to/from the userspace buffer. Must be
> * done atomically
> */
> -static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> - unsigned offset, unsigned count, int zeroing)
> +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> + unsigned offset, unsigned count, int zeroing)
> {
> int err;
> struct page *page = *pagep;
> @@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> flush_dcache_page(page);
> return 0;
> }
> +EXPORT_SYMBOL_GPL(fuse_copy_page);
>
> /* Copy pages in the request to/from userspace buffer */
> static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
> @@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
> struct fuse_copy_state *cs)
> {
> struct fuse_notify_store_out outarg;
> - struct inode *inode;
> - struct address_space *mapping;
> - u64 nodeid;
> int err;
> - pgoff_t index;
> - unsigned int offset;
> - unsigned int num;
> - loff_t file_size;
> - loff_t end;
>
> err = -EINVAL;
> if (size < sizeof(outarg))
> @@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
> if (size - sizeof(outarg) != outarg.size)
> goto out_finish;
>
> - nodeid = outarg.nodeid;
> + err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
> + outarg.offset);
>
> - down_read(&fc->killsb);
> -
> - err = -ENOENT;
> - if (!fc->sb)
> - goto out_up_killsb;
> -
> - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> - if (!inode)
> - goto out_up_killsb;
> -
> - mapping = inode->i_mapping;
> - index = outarg.offset >> PAGE_CACHE_SHIFT;
> - offset = outarg.offset & ~PAGE_CACHE_MASK;
> - file_size = i_size_read(inode);
> - end = outarg.offset + outarg.size;
> - if (end > file_size) {
> - file_size = end;
> - fuse_write_update_size(inode, file_size);
> - }
> -
> - num = outarg.size;
> - while (num) {
> - struct page *page;
> - unsigned int this_num;
> -
> - err = -ENOMEM;
> - page = find_or_create_page(mapping, index,
> - mapping_gfp_mask(mapping));
> - if (!page)
> - goto out_iput;
> -
> - this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> - err = fuse_copy_page(cs, &page, offset, this_num, 0);
> - if (!err && offset == 0 &&
> - (this_num == PAGE_CACHE_SIZE || file_size == end))
> - SetPageUptodate(page);
> - unlock_page(page);
> - page_cache_release(page);
> -
> - if (err)
> - goto out_iput;
> -
> - num -= this_num;
> - offset = 0;
> - index++;
> - }
> -
> - err = 0;
> -
> -out_iput:
> - iput(inode);
> -out_up_killsb:
> - up_read(&fc->killsb);
> out_finish:
> fuse_copy_finish(cs);
> return err;
> }
>
> -static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
> -{
> - release_pages(req->pages, req->num_pages, false);
> -}
> -
> -static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
> - struct fuse_notify_retrieve_out *outarg)
> -{
> - int err;
> - struct address_space *mapping = inode->i_mapping;
> - struct fuse_req *req;
> - pgoff_t index;
> - loff_t file_size;
> - unsigned int num;
> - unsigned int offset;
> - size_t total_len = 0;
> - int num_pages;
> -
> - offset = outarg->offset & ~PAGE_CACHE_MASK;
> - file_size = i_size_read(inode);
> -
> - num = outarg->size;
> - if (outarg->offset > file_size)
> - num = 0;
> - else if (outarg->offset + num > file_size)
> - num = file_size - outarg->offset;
> -
> - num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
> - num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
> -
> - req = fuse_get_req(fc, num_pages);
> - if (IS_ERR(req))
> - return PTR_ERR(req);
> -
> - req->in.h.opcode = FUSE_NOTIFY_REPLY;
> - req->in.h.nodeid = outarg->nodeid;
> - req->in.numargs = 2;
> - req->in.argpages = 1;
> - req->page_descs[0].offset = offset;
> - req->end = fuse_retrieve_end;
> -
> - index = outarg->offset >> PAGE_CACHE_SHIFT;
> -
> - while (num && req->num_pages < num_pages) {
> - struct page *page;
> - unsigned int this_num;
> -
> - page = find_get_page(mapping, index);
> - if (!page)
> - break;
> -
> - this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> - req->pages[req->num_pages] = page;
> - req->page_descs[req->num_pages].length = this_num;
> - req->num_pages++;
> -
> - offset = 0;
> - num -= this_num;
> - total_len += this_num;
> - index++;
> - }
> - req->misc.retrieve_in.offset = outarg->offset;
> - req->misc.retrieve_in.size = total_len;
> - req->in.args[0].size = sizeof(req->misc.retrieve_in);
> - req->in.args[0].value = &req->misc.retrieve_in;
> - req->in.args[1].size = total_len;
> -
> - err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> - if (err)
> - fuse_retrieve_end(fc, req);
> -
> - return err;
> -}
> -
> static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
> struct fuse_copy_state *cs)
> {
> struct fuse_notify_retrieve_out outarg;
> - struct inode *inode;
> int err;
>
> err = -EINVAL;
> @@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
>
> fuse_copy_finish(cs);
>
> - down_read(&fc->killsb);
> - err = -ENOENT;
> - if (fc->sb) {
> - u64 nodeid = outarg.nodeid;
> -
> - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> - if (inode) {
> - err = fuse_retrieve(fc, inode, &outarg);
> - iput(inode);
> - }
> - }
> - up_read(&fc->killsb);
> + err = fc->ops->notify_retrieve(fc, &outarg);
>
> return err;
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 4051131..a56222b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -337,6 +337,7 @@ struct fuse_req {
> struct fuse_req *next;
> } write;
> struct fuse_notify_retrieve_in retrieve_in;
> + struct fuse_munmap_in munmap_in;
> } misc;
>
> /** page vector */
> @@ -431,6 +432,21 @@ struct fuse_dev {
> struct list_head entry;
> };
>
> +struct fuse_copy_state;
> +
> +struct fuse_conn_operations {
> + /** Called on final put */
> + void (*release)(struct fuse_conn *);
> +
> + /** Called to store data into a mapping */
> + int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
> + u64 nodeid, u32 size, u64 pos);
> +
> + /** Called to retrieve data from a mapping */
> + int (*notify_retrieve)(struct fuse_conn *,
> + struct fuse_notify_retrieve_out *);
> +};
> +
> /**
> * A Fuse connection.
> *
> @@ -578,6 +594,9 @@ struct fuse_conn {
> /** Is poll not implemented by fs? */
> unsigned no_poll:1;
>
> + /** Is direct mmap not implemente by fs? */
> + unsigned no_dmmap:1;
> +
> /** Do multi-page cached writes */
> unsigned big_writes:1;
>
> @@ -635,9 +654,6 @@ struct fuse_conn {
> /** Version counter for attribute changes */
> u64 attr_version;
>
> - /** Called on final put */
> - void (*release)(struct fuse_conn *);
> -
> /** Super block for this connection. */
> struct super_block *sb;
>
> @@ -646,6 +662,12 @@ struct fuse_conn {
>
> /** List of device instances belonging to this connection */
> struct list_head devices;
> +
> + /** List of direct mmaps (currently CUSE only) */
> + struct list_head dmmap_list;
> +
> + /** Operations that fuse and cuse can implement differently */
> + const struct fuse_conn_operations *ops;
> };
>
> static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
> @@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
>
> void fuse_set_initialized(struct fuse_conn *fc);
>
> +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> + unsigned offset, unsigned count, int zeroing);
> +
> +int fuse_request_send_notify_reply(struct fuse_conn *fc,
> + struct fuse_req *req, u64 unique);
> +
> #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ac81f48..5284b84 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
> fc->connected = 1;
> fc->attr_version = 1;
> get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
> + INIT_LIST_HEAD(&fc->dmmap_list);
> }
> EXPORT_SYMBOL_GPL(fuse_conn_init);
>
> @@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
> if (atomic_dec_and_test(&fc->count)) {
> if (fc->destroy_req)
> fuse_request_free(fc->destroy_req);
> - fc->release(fc);
> + fc->ops->release(fc);
> }
> }
> EXPORT_SYMBOL_GPL(fuse_conn_put);
> @@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
> }
> EXPORT_SYMBOL_GPL(fuse_dev_free);
>
> +static int fuse_notify_store_to_inode(struct fuse_conn *fc,
> + struct fuse_copy_state *cs,
> + u64 nodeid, u32 size, u64 pos)
> +{
> + struct inode *inode;
> + struct address_space *mapping;
> + pgoff_t index;
> + unsigned int off;
> + loff_t file_size;
> + loff_t end;
> + int err;
> +
> + down_read(&fc->killsb);
> +
> + err = -ENOENT;
> + if (!fc->sb)
> + goto out_up_killsb;
> +
> + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> + if (!inode)
> + goto out_up_killsb;
> +
> + mapping = inode->i_mapping;
> + index = pos >> PAGE_CACHE_SHIFT;
> + off = pos & ~PAGE_CACHE_MASK;
> + file_size = i_size_read(inode);
> + end = pos + size;
> + if (end > file_size) {
> + file_size = end;
> + fuse_write_update_size(inode, file_size);
> + }
> +
> + while (size) {
> + struct page *page;
> + unsigned int this_num;
> +
> + err = -ENOMEM;
> + page = find_or_create_page(mapping, index,
> + mapping_gfp_mask(mapping));
> + if (!page)
> + goto out_iput;
> +
> + this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
> + err = fuse_copy_page(cs, &page, off, this_num, 0);
> + if (!err && off == 0 && (size != 0 || file_size == end))
> + SetPageUptodate(page);
> + unlock_page(page);
> + page_cache_release(page);
> +
> + if (err)
> + goto out_iput;
> +
> + size -= this_num;
> + off = 0;
> + index++;
> + }
> +
> + err = 0;
> +
> +out_iput:
> + iput(inode);
> +out_up_killsb:
> + up_read(&fc->killsb);
> +
> + return err;
> +}
> +
> +static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
> +{
> + release_pages(req->pages, req->num_pages, 0);
> +}
> +
> +static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
> + struct fuse_notify_retrieve_out *outarg)
> +{
> + int err;
> + struct address_space *mapping = inode->i_mapping;
> + struct fuse_req *req;
> + pgoff_t index;
> + loff_t file_size;
> + unsigned int num;
> + unsigned int offset;
> + size_t total_len = 0;
> +
> + req = fuse_get_req(fc, 0);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> +
> + offset = outarg->offset & ~PAGE_CACHE_MASK;
> +
> + req->in.h.opcode = FUSE_NOTIFY_REPLY;
> + req->in.h.nodeid = outarg->nodeid;
> + req->in.numargs = 2;
> + req->in.argpages = 1;
> + req->end = fuse_retrieve_end;
> +
> + index = outarg->offset >> PAGE_CACHE_SHIFT;
> + file_size = i_size_read(inode);
> + num = outarg->size;
> + if (outarg->offset > file_size)
> + num = 0;
> + else if (outarg->offset + num > file_size)
> + num = file_size - outarg->offset;
> +
> + while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> + struct page *page;
> + unsigned int this_num;
> +
> + page = find_get_page(mapping, index);
> + if (!page)
> + break;
> +
> + this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> + req->pages[req->num_pages] = page;
> + req->num_pages++;
> +
> + num -= this_num;
> + total_len += this_num;
> + index++;
> + }
> + req->misc.retrieve_in.offset = outarg->offset;
> + req->misc.retrieve_in.size = total_len;
> + req->in.args[0].size = sizeof(req->misc.retrieve_in);
> + req->in.args[0].value = &req->misc.retrieve_in;
> + req->in.args[1].size = total_len;
> +
> + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> + if (err)
> + fuse_retrieve_end(fc, req);
> +
> + return err;
> +}
> +
> +static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
> + struct fuse_notify_retrieve_out *outarg)
> +{
> + struct inode *inode;
> + int err;
> +
> + down_read(&fc->killsb);
> + err = -ENOENT;
> + if (fc->sb) {
> + u64 nodeid = outarg->nodeid;
> +
> + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> + if (inode) {
> + err = fuse_retrieve(fc, inode, outarg);
> + iput(inode);
> + }
> + }
> + up_read(&fc->killsb);
> +
> + return err;
> +}
> +
> +static const struct fuse_conn_operations fuse_default_ops = {
> + .release = fuse_free_conn,
> + .notify_store = fuse_notify_store_to_inode,
> + .notify_retrieve = fuse_notify_retrieve_from_inode,
> +};
> +
> static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> {
> struct fuse_dev *fud;
> @@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> goto err_fput;
>
> fuse_conn_init(fc);
> - fc->release = fuse_free_conn;
> + fc->ops = &fuse_default_ops;
>
> fud = fuse_dev_alloc(fc);
> if (!fud)
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index c9aca04..3f4c54b 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -102,6 +102,7 @@
> * - add ctime and ctimensec to fuse_setattr_in
> * - add FUSE_RENAME2 request
> * - add FUSE_NO_OPEN_SUPPORT flag
> + * - add FUSE_MMAP and FUSE_MUNMAP
> */
>
> #ifndef _LINUX_FUSE_H
> @@ -358,6 +359,8 @@ enum fuse_opcode {
> FUSE_FALLOCATE = 43,
> FUSE_READDIRPLUS = 44,
> FUSE_RENAME2 = 45,
> + FUSE_MMAP = 46,
> + FUSE_MUNMAP = 47,
>
> /* CUSE specific operations */
> CUSE_INIT = 4096,
> @@ -670,6 +673,29 @@ struct fuse_fallocate_in {
> uint32_t padding;
> };
>
> +struct fuse_mmap_in {
> + __u64 fh;
> + __u64 addr;
> + __u64 len;
> + __u32 prot;
> + __u32 flags;
> + __u64 offset;
> +};
> +
> +struct fuse_mmap_out {
> + __u64 mapid; /* Mmap ID, same namespace as Inode ID */
> + __u64 size; /* Size of memory region */
> + __u64 reserved;
> +};
> +
> +struct fuse_munmap_in {
> + __u64 fh;
> + __u64 mapid;
> + __u64 size; /* Size of memory region */
> + __u64 offset;
> + __u64 reserved;
> +};
> +
> struct fuse_in_header {
> uint32_t len;
> uint32_t opcode;
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists