lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5671C069.7090300@studio.unibo.it>
Date:	Wed, 16 Dec 2015 20:50:01 +0100
From:	Luca Risolia <luca.risolia@...dio.unibo.it>
To:	"Jader H. Silva" <jaderhs5@...il.com>,
	Miklos Szeredi <miklos@...redi.hu>,
	Andrew Morton <akpm@...ux-foundation.org>,
	<linux-fsdevel@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	Nikolaus Rath <Nikolaus@...h.org>
Subject: Re: [PATCH] fuse: implement cuse mmap

I tested this patch and gave some hints to Jader when it first appeared 
on the libfuse mailing list some months ago.

Signed-off-by: Luca Risolia <luca.risolia@...dio.unibo.it>

Jader H. Silva wrote:
> Implement cuse mmap using shmem to provide the actual memory maps.
> Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.
>
> Signed-off-by: Jader H. Silva <jaderhs5@...il.com>
> ---
>   fs/fuse/cuse.c            | 459 +++++++++++++++++++++++++++++++++++++++++++++-
>   fs/fuse/dev.c             | 163 +---------------
>   fs/fuse/fuse_i.h          |  34 +++-
>   fs/fuse/inode.c           | 166 ++++++++++++++++-
>   include/uapi/linux/fuse.h |  26 +++
>   5 files changed, 688 insertions(+), 160 deletions(-)
>
> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> index eae2c11..7749c13 100644
> --- a/fs/fuse/cuse.c
> +++ b/fs/fuse/cuse.c
> @@ -48,6 +48,9 @@
>   #include <linux/stat.h>
>   #include <linux/module.h>
>   #include <linux/uio.h>
> +#include <linux/mman.h>
> +#include <linux/falloc.h>
> +#include <linux/shmem_fs.h>
>
>   #include "fuse_i.h"
>
> @@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
>   	return fuse_do_ioctl(file, cmd, arg, flags);
>   }
>
> +struct fuse_dmmap_region {
> +	u64 mapid;
> +	u64 size;
> +	struct file *filp;
> +	struct vm_operations_struct vm_ops;
> +	const struct vm_operations_struct *vm_original_ops;
> +	struct list_head list;
> +	atomic_t ref;
> +};
> +
> +/*
> + * fuse_dmmap_vm represents the result of a single mmap() call, which
> + * can be shared by multiple client vmas created by forking.
> + */
> +struct fuse_dmmap_vm {
> +	u64 len;
> +	u64 off;
> +	atomic_t open_count;
> +	struct fuse_dmmap_region *region;
> +};
> +
> +static void fuse_dmmap_region_put(struct fuse_conn *fc,
> +				  struct fuse_dmmap_region *fdr)
> +{
> +	if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
> +
> +		list_del(&fdr->list);
> +
> +		spin_unlock(&fc->lock);
> +
> +		fput(fdr->filp);
> +		kfree(fdr);
> +	}
> +}
> +
> +static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
> +{
> +	struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> +	struct fuse_dmmap_region *fdr = fdvm->region;
> +
> +	/* vma copied */
> +	atomic_inc(&fdvm->open_count);
> +
> +	if (fdr->vm_original_ops->open)
> +		fdr->vm_original_ops->open(vma);
> +}
> +
> +static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
> +{
> +	struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> +	struct fuse_dmmap_region *fdr = fdvm->region;
> +	struct fuse_file *ff = vma->vm_file->private_data;
> +	struct fuse_conn *fc = ff->fc;
> +	struct fuse_req *req;
> +	struct fuse_munmap_in *inarg;
> +
> +	if (fdr->vm_original_ops->close)
> +		fdr->vm_original_ops->close(vma);
> +
> +	if (!atomic_dec_and_test(&fdvm->open_count))
> +		return;
> +
> +	/*
> +	 * Notify server that the mmap region has been unmapped.
> +	 * Failing this might lead to resource leak in server, don't
> +	 * fail.
> +	 */
> +	req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
> +	inarg = &req->misc.munmap_in;
> +
> +	inarg->fh = ff->fh;
> +	inarg->mapid = fdvm->region->mapid;
> +	inarg->size = fdvm->len;
> +	inarg->offset = fdvm->off;
> +
> +	req->in.h.opcode = FUSE_MUNMAP;
> +	req->in.h.nodeid = ff->nodeid;
> +	req->in.numargs = 1;
> +	req->in.args[0].size = sizeof(*inarg);
> +	req->in.args[0].value = inarg;
> +
> +	fuse_request_send(fc, req);
> +	fuse_put_request(fc, req);
> +	fuse_dmmap_region_put(fc, fdvm->region);
> +	kfree(fdvm);
> +}
> +
> +static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	int ret;
> +	struct file *filp = vma->vm_file;
> +	struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> +	struct fuse_dmmap_region *fdr = fdvm->region;
> +
> +	vma->vm_file = fdr->filp;
> +	ret = fdr->vm_original_ops->fault(vma, vmf);
> +
> +	vma->vm_file = filp;
> +
> +	return ret;
> +}
> +
> +static const struct vm_operations_struct fuse_dmmap_vm_ops = {
> +	.open		= fuse_dmmap_vm_open,
> +	.close		= fuse_dmmap_vm_close,
> +	.fault		= fuse_dmmap_vm_fault,
> +};
> +
> +static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
> +							u64 mapid)
> +{
> +	struct fuse_dmmap_region *curr;
> +	struct fuse_dmmap_region *fdr = NULL;
> +
> +	list_for_each_entry(curr, &fc->dmmap_list, list) {
> +		if (curr->mapid == mapid) {
> +			fdr = curr;
> +			atomic_inc(&fdr->ref);
> +			break;
> +		}
> +	}
> +
> +	return fdr;
> +}
> +
> +static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
> +						 u64 mapid)
> +{
> +	struct fuse_dmmap_region *fdr;
> +
> +	spin_lock(&fc->lock);
> +	fdr = fuse_dmmap_find_locked(fc, mapid);
> +	spin_unlock(&fc->lock);
> +
> +	return fdr;
> +}
> +
> +static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
> +						struct file *file, u64 mapid,
> +						u64 size, unsigned long flags)
> +{
> +	struct fuse_dmmap_region *fdr;
> +	char *pathbuf, *filepath;
> +	struct file *shmem_file;
> +
> +	fdr = fuse_dmmap_find(fc, mapid);
> +	if (!fdr) {
> +		struct fuse_dmmap_region *tmp;
> +
> +		fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
> +		if (!fdr)
> +			return ERR_PTR(-ENOMEM);
> +
> +		atomic_set(&fdr->ref, 1);
> +
> +		pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
> +		if (!pathbuf) {
> +			kfree(fdr);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +
> +		filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
> +		if (IS_ERR(filepath)) {
> +			kfree(fdr);
> +			kfree(pathbuf);
> +			return (struct fuse_dmmap_region *) filepath;
> +		}
> +
> +		fdr->mapid = mapid;
> +		shmem_file = shmem_file_setup(filepath, size, flags);
> +		kfree(pathbuf);
> +
> +		if (IS_ERR(shmem_file)) {
> +			kfree(fdr);
> +			return (struct fuse_dmmap_region *) shmem_file;
> +		}
> +
> +		fdr->filp = shmem_file;
> +
> +		spin_lock(&fc->lock);
> +		tmp = fuse_dmmap_find_locked(fc, mapid);
> +		if (tmp) {
> +			fput(fdr->filp);
> +			kfree(fdr);
> +			fdr = tmp;
> +		} else {
> +			INIT_LIST_HEAD(&fdr->list);
> +			list_add(&fdr->list, &fc->dmmap_list);
> +		}
> +		spin_unlock(&fc->lock);
> +	}
> +
> +	if (size > fdr->size) {
> +
> +		fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
> +		fdr->size = size;
> +	}
> +
> +	return fdr;
> +}
> +
> +static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	int err;
> +	struct fuse_file *ff = file->private_data;
> +	struct fuse_conn *fc = ff->fc;
> +	struct fuse_dmmap_vm *fdvm;
> +	struct fuse_dmmap_region *fdr;
> +	struct fuse_req *req = NULL;
> +	struct fuse_mmap_in inarg;
> +	struct fuse_mmap_out outarg;
> +
> +	if (fc->no_dmmap)
> +		return -ENOSYS;
> +
> +	req = fuse_get_req(fc, 0);
> +	if (IS_ERR(req))
> +		return PTR_ERR(req);
> +
> +	/* ask server whether this mmap is okay and what the size should be */
> +	memset(&inarg, 0, sizeof(inarg));
> +	inarg.fh = ff->fh;
> +	inarg.addr = vma->vm_start;
> +	inarg.len = vma->vm_end - vma->vm_start;
> +	inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
> +		     ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
> +		     ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
> +	inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
> +		      ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
> +		      ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
> +		      ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
> +		      ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
> +	inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
> +
> +	req->in.h.opcode = FUSE_MMAP;
> +	req->in.h.nodeid = ff->nodeid;
> +	req->in.numargs = 1;
> +	req->in.args[0].size = sizeof(inarg);
> +	req->in.args[0].value = &inarg;
> +	req->out.numargs = 1;
> +	req->out.args[0].size = sizeof(outarg);
> +	req->out.args[0].value = &outarg;
> +
> +	fuse_request_send(fc, req);
> +	err = req->out.h.error;
> +	if (err) {
> +		if (err == -ENOSYS)
> +			fc->no_dmmap = 1;
> +		goto free_req;
> +	}
> +
> +	fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
> +			     vma->vm_flags);
> +	err = PTR_ERR(fdr);
> +	if (IS_ERR(fdr))
> +		goto free_req;
> +
> +	err = -ENOMEM;
> +
> +	fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
> +	if (!fdvm) {
> +		fuse_dmmap_region_put(fc, fdr);
> +		goto free_req;
> +	}
> +
> +	atomic_set(&fdvm->open_count, 1);
> +	fdvm->region = fdr;
> +	fdvm->len = inarg.len;
> +	fdvm->off = inarg.offset;
> +
> +	fdr->filp->f_op->mmap(fdr->filp, vma);
> +
> +	memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
> +	fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
> +	fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
> +	fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
> +
> +	fdr->vm_original_ops = vma->vm_ops;
> +
> +	vma->vm_ops = &fdr->vm_ops;
> +
> +	vma->vm_private_data = fdvm;
> +	vma->vm_flags |= VM_DONTEXPAND;	/* disallow expansion for now */
> +	err = 0;
> +
> +free_req:
> +	fuse_put_request(fc, req);
> +	return err;
> +}
> +
> +static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
> +				      struct fuse_copy_state *cs,
> +				      u64 nodeid, u32 size, u64 pos)
> +{
> +	struct fuse_dmmap_region *fdr;
> +	struct file *filp;
> +	pgoff_t index;
> +	unsigned int off;
> +	int err;
> +
> +	fdr = fuse_dmmap_find(fc, nodeid);
> +	if (!fdr)
> +		return -ENOENT;
> +
> +	index = pos >> PAGE_SHIFT;
> +	off = pos & ~PAGE_MASK;
> +	if (pos > fdr->size)
> +		size = 0;
> +	else if (size > fdr->size - pos)
> +		size = fdr->size - pos;
> +
> +	filp = fdr->filp;
> +
> +	while (size) {
> +		struct page *page;
> +		unsigned int this_num;
> +
> +		page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
> +						   index, GFP_HIGHUSER);
> +		if (IS_ERR(page)) {
> +
> +			err = -ENOMEM;
> +			goto out_iput;
> +		}
> +
> +		this_num = min_t(unsigned, size, PAGE_SIZE - off);
> +		err = fuse_copy_page(cs, &page, off, this_num, 0);
> +
> +		unlock_page(page);
> +		page_cache_release(page);
> +
> +		if (err)
> +			goto out_iput;
> +
> +		size -= this_num;
> +		off = 0;
> +		index++;
> +	}
> +
> +	err = 0;
> +
> +out_iput:
> +	fuse_dmmap_region_put(fc, fdr);
> +
> +	return err;
> +}
> +
> +static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
> +{
> +	release_pages(req->pages, req->num_pages, 0);
> +}
> +
> +static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
> +				struct fuse_notify_retrieve_out *outarg)
> +{
> +	struct fuse_dmmap_region *fdr;
> +	struct fuse_req *req;
> +	struct page *page;
> +	struct file *filp;
> +	pgoff_t index;
> +	unsigned int num;
> +	unsigned int offset;
> +	unsigned int npages;
> +	unsigned int this_num;
> +	size_t total_len = 0;
> +	int err;
> +
> +	fdr = fuse_dmmap_find(fc, outarg->nodeid);
> +	if (!fdr)
> +		return -ENOENT;
> +
> +	npages = outarg->size >> PAGE_SHIFT;
> +	if (outarg->size & ~PAGE_MASK)
> +		npages++;
> +
> +	req = fuse_get_req(fc, npages);
> +	err = PTR_ERR(req);
> +	if (IS_ERR(req))
> +		goto out_put_region;
> +
> +	offset = outarg->offset & ~PAGE_MASK;
> +
> +	req->in.h.opcode = FUSE_NOTIFY_REPLY;
> +	req->in.h.nodeid = outarg->nodeid;
> +	req->in.numargs = 2;
> +	req->in.argpages = 1;
> +	req->end = fuse_retrieve_dmmap_end;
> +
> +	index = outarg->offset >> PAGE_SHIFT;
> +	num = outarg->size;
> +	if (outarg->offset > fdr->size)
> +		num = 0;
> +	else if (outarg->offset + num > fdr->size)
> +		num = fdr->size - outarg->offset;
> +
> +	filp = fdr->filp;
> +
> +	npages = 0;
> +	while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> +
> +		page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
> +						   index,
> +						   GFP_KERNEL);
> +		if (IS_ERR(page)) {
> +			err = -ENOMEM;
> +			goto out_put_region;
> +		}
> +
> +		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
> +		req->pages[req->num_pages] = page;
> +		req->page_descs[req->num_pages].length = this_num;
> +		req->num_pages++;
> +
> +		num -= this_num;
> +		total_len += this_num;
> +		index++;
> +		npages++;
> +	}
> +	req->misc.retrieve_in.offset = outarg->offset;
> +	req->misc.retrieve_in.size = total_len;
> +	req->in.args[0].size = sizeof(req->misc.retrieve_in);
> +	req->in.args[0].value = &req->misc.retrieve_in;
> +	req->in.args[1].size = total_len;
> +
> +	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> +	if (err)
> +		fuse_retrieve_dmmap_end(fc, req);
> +
> +out_put_region:
> +	fuse_dmmap_region_put(fc, fdr);
> +
> +	return err;
> +}
> +
> +
>   static const struct file_operations cuse_frontend_fops = {
>   	.owner			= THIS_MODULE,
>   	.read_iter		= cuse_read_iter,
> @@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
>   	.unlocked_ioctl		= cuse_file_ioctl,
>   	.compat_ioctl		= cuse_file_compat_ioctl,
>   	.poll			= fuse_file_poll,
> -	.llseek		= noop_llseek,
> +	.llseek			= noop_llseek,
> +	.mmap			= cuse_mmap,
>   };
>
>
> @@ -468,10 +907,26 @@ err:
>
>   static void cuse_fc_release(struct fuse_conn *fc)
>   {
> +	struct fuse_dmmap_region *fdr;
>   	struct cuse_conn *cc = fc_to_cc(fc);
> +
> +	spin_lock(&fc->lock);
> +	while (!list_empty(&fc->dmmap_list)) {
> +
> +		fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
> +		fuse_dmmap_region_put(fc, fdr);
> +	}
> +	spin_unlock(&fc->lock);
> +
>   	kfree_rcu(cc, fc.rcu);
>   }
>
> +static const struct fuse_conn_operations cuse_ops = {
> +	.release = cuse_fc_release,
> +	.notify_store = fuse_notify_store_to_dmmap,
> +	.notify_retrieve = fuse_notify_retrieve_from_dmmap,
> +};
> +
>   /**
>    * cuse_channel_open - open method for /dev/cuse
>    * @inode: inode for /dev/cuse
> @@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
>   	}
>
>   	INIT_LIST_HEAD(&cc->list);
> -	cc->fc.release = cuse_fc_release;
> +	cc->fc.ops = &cuse_ops;
>
>   	cc->fc.initialized = 1;
>   	rc = cuse_send_init(cc);
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 80cc1b3..0faf92c 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
>   	__clear_bit(FR_BACKGROUND, &req->flags);
>   	return req;
>   }
> +EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);
>
>   void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
>   {
> @@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
>   }
>   EXPORT_SYMBOL_GPL(fuse_request_send_background);
>
> -static int fuse_request_send_notify_reply(struct fuse_conn *fc,
> -					  struct fuse_req *req, u64 unique)
> +int fuse_request_send_notify_reply(struct fuse_conn *fc,
> +				   struct fuse_req *req, u64 unique)
>   {
>   	int err = -ENODEV;
>   	struct fuse_iqueue *fiq = &fc->iq;
> @@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
>   	}
>   	return err;
>   }
> +EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);
>
>   /*
>    * Unlock request.  If it was aborted while locked, caller is responsible
> @@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
>    * Copy a page in the request to/from the userspace buffer.  Must be
>    * done atomically
>    */
> -static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> -			  unsigned offset, unsigned count, int zeroing)
> +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> +		   unsigned offset, unsigned count, int zeroing)
>   {
>   	int err;
>   	struct page *page = *pagep;
> @@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
>   		flush_dcache_page(page);
>   	return 0;
>   }
> +EXPORT_SYMBOL_GPL(fuse_copy_page);
>
>   /* Copy pages in the request to/from userspace buffer */
>   static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
> @@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>   			     struct fuse_copy_state *cs)
>   {
>   	struct fuse_notify_store_out outarg;
> -	struct inode *inode;
> -	struct address_space *mapping;
> -	u64 nodeid;
>   	int err;
> -	pgoff_t index;
> -	unsigned int offset;
> -	unsigned int num;
> -	loff_t file_size;
> -	loff_t end;
>
>   	err = -EINVAL;
>   	if (size < sizeof(outarg))
> @@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
>   	if (size - sizeof(outarg) != outarg.size)
>   		goto out_finish;
>
> -	nodeid = outarg.nodeid;
> +	err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
> +				       outarg.offset);
>
> -	down_read(&fc->killsb);
> -
> -	err = -ENOENT;
> -	if (!fc->sb)
> -		goto out_up_killsb;
> -
> -	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> -	if (!inode)
> -		goto out_up_killsb;
> -
> -	mapping = inode->i_mapping;
> -	index = outarg.offset >> PAGE_CACHE_SHIFT;
> -	offset = outarg.offset & ~PAGE_CACHE_MASK;
> -	file_size = i_size_read(inode);
> -	end = outarg.offset + outarg.size;
> -	if (end > file_size) {
> -		file_size = end;
> -		fuse_write_update_size(inode, file_size);
> -	}
> -
> -	num = outarg.size;
> -	while (num) {
> -		struct page *page;
> -		unsigned int this_num;
> -
> -		err = -ENOMEM;
> -		page = find_or_create_page(mapping, index,
> -					   mapping_gfp_mask(mapping));
> -		if (!page)
> -			goto out_iput;
> -
> -		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> -		err = fuse_copy_page(cs, &page, offset, this_num, 0);
> -		if (!err && offset == 0 &&
> -		    (this_num == PAGE_CACHE_SIZE || file_size == end))
> -			SetPageUptodate(page);
> -		unlock_page(page);
> -		page_cache_release(page);
> -
> -		if (err)
> -			goto out_iput;
> -
> -		num -= this_num;
> -		offset = 0;
> -		index++;
> -	}
> -
> -	err = 0;
> -
> -out_iput:
> -	iput(inode);
> -out_up_killsb:
> -	up_read(&fc->killsb);
>   out_finish:
>   	fuse_copy_finish(cs);
>   	return err;
>   }
>
> -static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
> -{
> -	release_pages(req->pages, req->num_pages, false);
> -}
> -
> -static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
> -			 struct fuse_notify_retrieve_out *outarg)
> -{
> -	int err;
> -	struct address_space *mapping = inode->i_mapping;
> -	struct fuse_req *req;
> -	pgoff_t index;
> -	loff_t file_size;
> -	unsigned int num;
> -	unsigned int offset;
> -	size_t total_len = 0;
> -	int num_pages;
> -
> -	offset = outarg->offset & ~PAGE_CACHE_MASK;
> -	file_size = i_size_read(inode);
> -
> -	num = outarg->size;
> -	if (outarg->offset > file_size)
> -		num = 0;
> -	else if (outarg->offset + num > file_size)
> -		num = file_size - outarg->offset;
> -
> -	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
> -	num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
> -
> -	req = fuse_get_req(fc, num_pages);
> -	if (IS_ERR(req))
> -		return PTR_ERR(req);
> -
> -	req->in.h.opcode = FUSE_NOTIFY_REPLY;
> -	req->in.h.nodeid = outarg->nodeid;
> -	req->in.numargs = 2;
> -	req->in.argpages = 1;
> -	req->page_descs[0].offset = offset;
> -	req->end = fuse_retrieve_end;
> -
> -	index = outarg->offset >> PAGE_CACHE_SHIFT;
> -
> -	while (num && req->num_pages < num_pages) {
> -		struct page *page;
> -		unsigned int this_num;
> -
> -		page = find_get_page(mapping, index);
> -		if (!page)
> -			break;
> -
> -		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> -		req->pages[req->num_pages] = page;
> -		req->page_descs[req->num_pages].length = this_num;
> -		req->num_pages++;
> -
> -		offset = 0;
> -		num -= this_num;
> -		total_len += this_num;
> -		index++;
> -	}
> -	req->misc.retrieve_in.offset = outarg->offset;
> -	req->misc.retrieve_in.size = total_len;
> -	req->in.args[0].size = sizeof(req->misc.retrieve_in);
> -	req->in.args[0].value = &req->misc.retrieve_in;
> -	req->in.args[1].size = total_len;
> -
> -	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> -	if (err)
> -		fuse_retrieve_end(fc, req);
> -
> -	return err;
> -}
> -
>   static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
>   				struct fuse_copy_state *cs)
>   {
>   	struct fuse_notify_retrieve_out outarg;
> -	struct inode *inode;
>   	int err;
>
>   	err = -EINVAL;
> @@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
>
>   	fuse_copy_finish(cs);
>
> -	down_read(&fc->killsb);
> -	err = -ENOENT;
> -	if (fc->sb) {
> -		u64 nodeid = outarg.nodeid;
> -
> -		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> -		if (inode) {
> -			err = fuse_retrieve(fc, inode, &outarg);
> -			iput(inode);
> -		}
> -	}
> -	up_read(&fc->killsb);
> +	err = fc->ops->notify_retrieve(fc, &outarg);
>
>   	return err;
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 4051131..a56222b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -337,6 +337,7 @@ struct fuse_req {
>   			struct fuse_req *next;
>   		} write;
>   		struct fuse_notify_retrieve_in retrieve_in;
> +		struct fuse_munmap_in munmap_in;
>   	} misc;
>
>   	/** page vector */
> @@ -431,6 +432,21 @@ struct fuse_dev {
>   	struct list_head entry;
>   };
>
> +struct fuse_copy_state;
> +
> +struct fuse_conn_operations {
> +	/** Called on final put */
> +	void (*release)(struct fuse_conn *);
> +
> +	/** Called to store data into a mapping */
> +	int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
> +			    u64 nodeid, u32 size, u64 pos);
> +
> +	/** Called to retrieve data from a mapping */
> +	int (*notify_retrieve)(struct fuse_conn *,
> +			       struct fuse_notify_retrieve_out *);
> +};
> +
>   /**
>    * A Fuse connection.
>    *
> @@ -578,6 +594,9 @@ struct fuse_conn {
>   	/** Is poll not implemented by fs? */
>   	unsigned no_poll:1;
>
> +	/** Is direct mmap not implemente by fs? */
> +	unsigned no_dmmap:1;
> +
>   	/** Do multi-page cached writes */
>   	unsigned big_writes:1;
>
> @@ -635,9 +654,6 @@ struct fuse_conn {
>   	/** Version counter for attribute changes */
>   	u64 attr_version;
>
> -	/** Called on final put */
> -	void (*release)(struct fuse_conn *);
> -
>   	/** Super block for this connection. */
>   	struct super_block *sb;
>
> @@ -646,6 +662,12 @@ struct fuse_conn {
>
>   	/** List of device instances belonging to this connection */
>   	struct list_head devices;
> +
> +	/** List of direct mmaps (currently CUSE only) */
> +	struct list_head dmmap_list;
> +
> +	/** Operations that fuse and cuse can implement differently */
> +	const struct fuse_conn_operations *ops;
>   };
>
>   static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
> @@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
>
>   void fuse_set_initialized(struct fuse_conn *fc);
>
> +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> +		   unsigned offset, unsigned count, int zeroing);
> +
> +int fuse_request_send_notify_reply(struct fuse_conn *fc,
> +				   struct fuse_req *req, u64 unique);
> +
>   #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ac81f48..5284b84 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
>   	fc->connected = 1;
>   	fc->attr_version = 1;
>   	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
> +	INIT_LIST_HEAD(&fc->dmmap_list);
>   }
>   EXPORT_SYMBOL_GPL(fuse_conn_init);
>
> @@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
>   	if (atomic_dec_and_test(&fc->count)) {
>   		if (fc->destroy_req)
>   			fuse_request_free(fc->destroy_req);
> -		fc->release(fc);
> +		fc->ops->release(fc);
>   	}
>   }
>   EXPORT_SYMBOL_GPL(fuse_conn_put);
> @@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
>   }
>   EXPORT_SYMBOL_GPL(fuse_dev_free);
>
> +static int fuse_notify_store_to_inode(struct fuse_conn *fc,
> +				      struct fuse_copy_state *cs,
> +				      u64 nodeid, u32 size, u64 pos)
> +{
> +	struct inode *inode;
> +	struct address_space *mapping;
> +	pgoff_t index;
> +	unsigned int off;
> +	loff_t file_size;
> +	loff_t end;
> +	int err;
> +
> +	down_read(&fc->killsb);
> +
> +	err = -ENOENT;
> +	if (!fc->sb)
> +		goto out_up_killsb;
> +
> +	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> +	if (!inode)
> +		goto out_up_killsb;
> +
> +	mapping = inode->i_mapping;
> +	index = pos >> PAGE_CACHE_SHIFT;
> +	off = pos & ~PAGE_CACHE_MASK;
> +	file_size = i_size_read(inode);
> +	end = pos + size;
> +	if (end > file_size) {
> +		file_size = end;
> +		fuse_write_update_size(inode, file_size);
> +	}
> +
> +	while (size) {
> +		struct page *page;
> +		unsigned int this_num;
> +
> +		err = -ENOMEM;
> +		page = find_or_create_page(mapping, index,
> +					   mapping_gfp_mask(mapping));
> +		if (!page)
> +			goto out_iput;
> +
> +		this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
> +		err = fuse_copy_page(cs, &page, off, this_num, 0);
> +		if (!err && off == 0 && (size != 0 || file_size == end))
> +			SetPageUptodate(page);
> +		unlock_page(page);
> +		page_cache_release(page);
> +
> +		if (err)
> +			goto out_iput;
> +
> +		size -= this_num;
> +		off = 0;
> +		index++;
> +	}
> +
> +	err = 0;
> +
> +out_iput:
> +	iput(inode);
> +out_up_killsb:
> +	up_read(&fc->killsb);
> +
> +	return err;
> +}
> +
> +static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
> +{
> +	release_pages(req->pages, req->num_pages, 0);
> +}
> +
> +static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
> +			 struct fuse_notify_retrieve_out *outarg)
> +{
> +	int err;
> +	struct address_space *mapping = inode->i_mapping;
> +	struct fuse_req *req;
> +	pgoff_t index;
> +	loff_t file_size;
> +	unsigned int num;
> +	unsigned int offset;
> +	size_t total_len = 0;
> +
> +	req = fuse_get_req(fc, 0);
> +	if (IS_ERR(req))
> +		return PTR_ERR(req);
> +
> +	offset = outarg->offset & ~PAGE_CACHE_MASK;
> +
> +	req->in.h.opcode = FUSE_NOTIFY_REPLY;
> +	req->in.h.nodeid = outarg->nodeid;
> +	req->in.numargs = 2;
> +	req->in.argpages = 1;
> +	req->end = fuse_retrieve_end;
> +
> +	index = outarg->offset >> PAGE_CACHE_SHIFT;
> +	file_size = i_size_read(inode);
> +	num = outarg->size;
> +	if (outarg->offset > file_size)
> +		num = 0;
> +	else if (outarg->offset + num > file_size)
> +		num = file_size - outarg->offset;
> +
> +	while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> +		struct page *page;
> +		unsigned int this_num;
> +
> +		page = find_get_page(mapping, index);
> +		if (!page)
> +			break;
> +
> +		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> +		req->pages[req->num_pages] = page;
> +		req->num_pages++;
> +
> +		num -= this_num;
> +		total_len += this_num;
> +		index++;
> +	}
> +	req->misc.retrieve_in.offset = outarg->offset;
> +	req->misc.retrieve_in.size = total_len;
> +	req->in.args[0].size = sizeof(req->misc.retrieve_in);
> +	req->in.args[0].value = &req->misc.retrieve_in;
> +	req->in.args[1].size = total_len;
> +
> +	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> +	if (err)
> +		fuse_retrieve_end(fc, req);
> +
> +	return err;
> +}
> +
> +static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
> +				struct fuse_notify_retrieve_out *outarg)
> +{
> +	struct inode *inode;
> +	int err;
> +
> +	down_read(&fc->killsb);
> +	err = -ENOENT;
> +	if (fc->sb) {
> +		u64 nodeid = outarg->nodeid;
> +
> +		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> +		if (inode) {
> +			err = fuse_retrieve(fc, inode, outarg);
> +			iput(inode);
> +		}
> +	}
> +	up_read(&fc->killsb);
> +
> +	return err;
> +}
> +
> +static const struct fuse_conn_operations fuse_default_ops = {
> +	.release = fuse_free_conn,
> +	.notify_store = fuse_notify_store_to_inode,
> +	.notify_retrieve = fuse_notify_retrieve_from_inode,
> +};
> +
>   static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>   {
>   	struct fuse_dev *fud;
> @@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>   		goto err_fput;
>
>   	fuse_conn_init(fc);
> -	fc->release = fuse_free_conn;
> +	fc->ops = &fuse_default_ops;
>
>   	fud = fuse_dev_alloc(fc);
>   	if (!fud)
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index c9aca04..3f4c54b 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -102,6 +102,7 @@
>    *  - add ctime and ctimensec to fuse_setattr_in
>    *  - add FUSE_RENAME2 request
>    *  - add FUSE_NO_OPEN_SUPPORT flag
> + *  - add FUSE_MMAP and FUSE_MUNMAP
>    */
>
>   #ifndef _LINUX_FUSE_H
> @@ -358,6 +359,8 @@ enum fuse_opcode {
>   	FUSE_FALLOCATE     = 43,
>   	FUSE_READDIRPLUS   = 44,
>   	FUSE_RENAME2       = 45,
> +	FUSE_MMAP          = 46,
> +	FUSE_MUNMAP        = 47,
>
>   	/* CUSE specific operations */
>   	CUSE_INIT          = 4096,
> @@ -670,6 +673,29 @@ struct fuse_fallocate_in {
>   	uint32_t	padding;
>   };
>
> +struct fuse_mmap_in {
> +	__u64	fh;
> +	__u64	addr;
> +	__u64	len;
> +	__u32	prot;
> +	__u32	flags;
> +	__u64	offset;
> +};
> +
> +struct fuse_mmap_out {
> +	__u64	mapid;		/* Mmap ID, same namespace as Inode ID */
> +	__u64	size;		/* Size of memory region */
> +	__u64	reserved;
> +};
> +
> +struct fuse_munmap_in {
> +	__u64	fh;
> +	__u64	mapid;
> +	__u64	size;		/* Size of memory region */
> +	__u64	offset;
> +	__u64	reserved;
> +};
> +
>   struct fuse_in_header {
>   	uint32_t	len;
>   	uint32_t	opcode;
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ