lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <5671BE21.4080800@gmail.com>
Date:	Wed, 16 Dec 2015 17:40:17 -0200
From:	"Jader H. Silva" <jaderhs5@...il.com>
To:	Miklos Szeredi <miklos@...redi.hu>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
	Luca Risolia <luca.risolia@...dio.unibo.it>
Subject: [PATCH] fuse: implement cuse mmap

Implement cuse mmap using shmem to provide the actual memory maps.
Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.

Signed-off-by: Jader H. Silva <jaderhs5@...il.com>
---
 fs/fuse/cuse.c            | 459 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/dev.c             | 163 +---------------
 fs/fuse/fuse_i.h          |  34 +++-
 fs/fuse/inode.c           | 166 ++++++++++++++++-
 include/uapi/linux/fuse.h |  26 +++
 5 files changed, 688 insertions(+), 160 deletions(-)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index eae2c11..7749c13 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -48,6 +48,9 @@
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/uio.h>
+#include <linux/mman.h>
+#include <linux/falloc.h>
+#include <linux/shmem_fs.h>
 
 #include "fuse_i.h"
 
@@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 	return fuse_do_ioctl(file, cmd, arg, flags);
 }
 
+struct fuse_dmmap_region {
+	u64 mapid;
+	u64 size;
+	struct file *filp;
+	struct vm_operations_struct vm_ops;
+	const struct vm_operations_struct *vm_original_ops;
+	struct list_head list;
+	atomic_t ref;
+};
+
+/*
+ * fuse_dmmap_vm represents the result of a single mmap() call, which
+ * can be shared by multiple client vmas created by forking.
+ */
+struct fuse_dmmap_vm {
+	u64 len;
+	u64 off;
+	atomic_t open_count;
+	struct fuse_dmmap_region *region;
+};
+
+static void fuse_dmmap_region_put(struct fuse_conn *fc,
+				  struct fuse_dmmap_region *fdr)
+{
+	if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
+
+		list_del(&fdr->list);
+
+		spin_unlock(&fc->lock);
+
+		fput(fdr->filp);
+		kfree(fdr);
+	}
+}
+
+static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
+{
+	struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+	struct fuse_dmmap_region *fdr = fdvm->region;
+
+	/* vma copied */
+	atomic_inc(&fdvm->open_count);
+
+	if (fdr->vm_original_ops->open)
+		fdr->vm_original_ops->open(vma);
+}
+
+static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
+{
+	struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+	struct fuse_dmmap_region *fdr = fdvm->region;
+	struct fuse_file *ff = vma->vm_file->private_data;
+	struct fuse_conn *fc = ff->fc;
+	struct fuse_req *req;
+	struct fuse_munmap_in *inarg;
+
+	if (fdr->vm_original_ops->close)
+		fdr->vm_original_ops->close(vma);
+
+	if (!atomic_dec_and_test(&fdvm->open_count))
+		return;
+
+	/*
+	 * Notify server that the mmap region has been unmapped.
+	 * Failing this might lead to resource leak in server, don't
+	 * fail.
+	 */
+	req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
+	inarg = &req->misc.munmap_in;
+
+	inarg->fh = ff->fh;
+	inarg->mapid = fdvm->region->mapid;
+	inarg->size = fdvm->len;
+	inarg->offset = fdvm->off;
+
+	req->in.h.opcode = FUSE_MUNMAP;
+	req->in.h.nodeid = ff->nodeid;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(*inarg);
+	req->in.args[0].value = inarg;
+
+	fuse_request_send(fc, req);
+	fuse_put_request(fc, req);
+	fuse_dmmap_region_put(fc, fdvm->region);
+	kfree(fdvm);
+}
+
+static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	int ret;
+	struct file *filp = vma->vm_file;
+	struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+	struct fuse_dmmap_region *fdr = fdvm->region;
+
+	vma->vm_file = fdr->filp;
+	ret = fdr->vm_original_ops->fault(vma, vmf);
+
+	vma->vm_file = filp;
+
+	return ret;
+}
+
+static const struct vm_operations_struct fuse_dmmap_vm_ops = {
+	.open		= fuse_dmmap_vm_open,
+	.close		= fuse_dmmap_vm_close,
+	.fault		= fuse_dmmap_vm_fault,
+};
+
+static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
+							u64 mapid)
+{
+	struct fuse_dmmap_region *curr;
+	struct fuse_dmmap_region *fdr = NULL;
+
+	list_for_each_entry(curr, &fc->dmmap_list, list) {
+		if (curr->mapid == mapid) {
+			fdr = curr;
+			atomic_inc(&fdr->ref);
+			break;
+		}
+	}
+
+	return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
+						 u64 mapid)
+{
+	struct fuse_dmmap_region *fdr;
+
+	spin_lock(&fc->lock);
+	fdr = fuse_dmmap_find_locked(fc, mapid);
+	spin_unlock(&fc->lock);
+
+	return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
+						struct file *file, u64 mapid,
+						u64 size, unsigned long flags)
+{
+	struct fuse_dmmap_region *fdr;
+	char *pathbuf, *filepath;
+	struct file *shmem_file;
+
+	fdr = fuse_dmmap_find(fc, mapid);
+	if (!fdr) {
+		struct fuse_dmmap_region *tmp;
+
+		fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
+		if (!fdr)
+			return ERR_PTR(-ENOMEM);
+
+		atomic_set(&fdr->ref, 1);
+
+		pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
+		if (!pathbuf) {
+			kfree(fdr);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
+		if (IS_ERR(filepath)) {
+			kfree(fdr);
+			kfree(pathbuf);
+			return (struct fuse_dmmap_region *) filepath;
+		}
+
+		fdr->mapid = mapid;
+		shmem_file = shmem_file_setup(filepath, size, flags);
+		kfree(pathbuf);
+
+		if (IS_ERR(shmem_file)) {
+			kfree(fdr);
+			return (struct fuse_dmmap_region *) shmem_file;
+		}
+
+		fdr->filp = shmem_file;
+
+		spin_lock(&fc->lock);
+		tmp = fuse_dmmap_find_locked(fc, mapid);
+		if (tmp) {
+			fput(fdr->filp);
+			kfree(fdr);
+			fdr = tmp;
+		} else {
+			INIT_LIST_HEAD(&fdr->list);
+			list_add(&fdr->list, &fc->dmmap_list);
+		}
+		spin_unlock(&fc->lock);
+	}
+
+	if (size > fdr->size) {
+
+		fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
+		fdr->size = size;
+	}
+
+	return fdr;
+}
+
+static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
+	struct fuse_dmmap_vm *fdvm;
+	struct fuse_dmmap_region *fdr;
+	struct fuse_req *req = NULL;
+	struct fuse_mmap_in inarg;
+	struct fuse_mmap_out outarg;
+
+	if (fc->no_dmmap)
+		return -ENOSYS;
+
+	req = fuse_get_req(fc, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	/* ask server whether this mmap is okay and what the size should be */
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.fh = ff->fh;
+	inarg.addr = vma->vm_start;
+	inarg.len = vma->vm_end - vma->vm_start;
+	inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
+		     ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
+		     ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
+	inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
+		      ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
+		      ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
+		      ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
+		      ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
+	inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+
+	req->in.h.opcode = FUSE_MMAP;
+	req->in.h.nodeid = ff->nodeid;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+
+	fuse_request_send(fc, req);
+	err = req->out.h.error;
+	if (err) {
+		if (err == -ENOSYS)
+			fc->no_dmmap = 1;
+		goto free_req;
+	}
+
+	fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
+			     vma->vm_flags);
+	err = PTR_ERR(fdr);
+	if (IS_ERR(fdr))
+		goto free_req;
+
+	err = -ENOMEM;
+
+	fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
+	if (!fdvm) {
+		fuse_dmmap_region_put(fc, fdr);
+		goto free_req;
+	}
+
+	atomic_set(&fdvm->open_count, 1);
+	fdvm->region = fdr;
+	fdvm->len = inarg.len;
+	fdvm->off = inarg.offset;
+
+	fdr->filp->f_op->mmap(fdr->filp, vma);
+
+	memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
+	fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
+	fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
+	fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
+
+	fdr->vm_original_ops = vma->vm_ops;
+
+	vma->vm_ops = &fdr->vm_ops;
+
+	vma->vm_private_data = fdvm;
+	vma->vm_flags |= VM_DONTEXPAND;	/* disallow expansion for now */
+	err = 0;
+
+free_req:
+	fuse_put_request(fc, req);
+	return err;
+}
+
+static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
+				      struct fuse_copy_state *cs,
+				      u64 nodeid, u32 size, u64 pos)
+{
+	struct fuse_dmmap_region *fdr;
+	struct file *filp;
+	pgoff_t index;
+	unsigned int off;
+	int err;
+
+	fdr = fuse_dmmap_find(fc, nodeid);
+	if (!fdr)
+		return -ENOENT;
+
+	index = pos >> PAGE_SHIFT;
+	off = pos & ~PAGE_MASK;
+	if (pos > fdr->size)
+		size = 0;
+	else if (size > fdr->size - pos)
+		size = fdr->size - pos;
+
+	filp = fdr->filp;
+
+	while (size) {
+		struct page *page;
+		unsigned int this_num;
+
+		page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+						   index, GFP_HIGHUSER);
+		if (IS_ERR(page)) {
+
+			err = -ENOMEM;
+			goto out_iput;
+		}
+
+		this_num = min_t(unsigned, size, PAGE_SIZE - off);
+		err = fuse_copy_page(cs, &page, off, this_num, 0);
+
+		unlock_page(page);
+		page_cache_release(page);
+
+		if (err)
+			goto out_iput;
+
+		size -= this_num;
+		off = 0;
+		index++;
+	}
+
+	err = 0;
+
+out_iput:
+	fuse_dmmap_region_put(fc, fdr);
+
+	return err;
+}
+
+static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+	release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
+				struct fuse_notify_retrieve_out *outarg)
+{
+	struct fuse_dmmap_region *fdr;
+	struct fuse_req *req;
+	struct page *page;
+	struct file *filp;
+	pgoff_t index;
+	unsigned int num;
+	unsigned int offset;
+	unsigned int npages;
+	unsigned int this_num;
+	size_t total_len = 0;
+	int err;
+
+	fdr = fuse_dmmap_find(fc, outarg->nodeid);
+	if (!fdr)
+		return -ENOENT;
+
+	npages = outarg->size >> PAGE_SHIFT;
+	if (outarg->size & ~PAGE_MASK)
+		npages++;
+
+	req = fuse_get_req(fc, npages);
+	err = PTR_ERR(req);
+	if (IS_ERR(req))
+		goto out_put_region;
+
+	offset = outarg->offset & ~PAGE_MASK;
+
+	req->in.h.opcode = FUSE_NOTIFY_REPLY;
+	req->in.h.nodeid = outarg->nodeid;
+	req->in.numargs = 2;
+	req->in.argpages = 1;
+	req->end = fuse_retrieve_dmmap_end;
+
+	index = outarg->offset >> PAGE_SHIFT;
+	num = outarg->size;
+	if (outarg->offset > fdr->size)
+		num = 0;
+	else if (outarg->offset + num > fdr->size)
+		num = fdr->size - outarg->offset;
+
+	filp = fdr->filp;
+
+	npages = 0;
+	while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+
+		page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+						   index,
+						   GFP_KERNEL);
+		if (IS_ERR(page)) {
+			err = -ENOMEM;
+			goto out_put_region;
+		}
+
+		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+		req->pages[req->num_pages] = page;
+		req->page_descs[req->num_pages].length = this_num;
+		req->num_pages++;
+
+		num -= this_num;
+		total_len += this_num;
+		index++;
+		npages++;
+	}
+	req->misc.retrieve_in.offset = outarg->offset;
+	req->misc.retrieve_in.size = total_len;
+	req->in.args[0].size = sizeof(req->misc.retrieve_in);
+	req->in.args[0].value = &req->misc.retrieve_in;
+	req->in.args[1].size = total_len;
+
+	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+	if (err)
+		fuse_retrieve_dmmap_end(fc, req);
+
+out_put_region:
+	fuse_dmmap_region_put(fc, fdr);
+
+	return err;
+}
+
+
 static const struct file_operations cuse_frontend_fops = {
 	.owner			= THIS_MODULE,
 	.read_iter		= cuse_read_iter,
@@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
 	.unlocked_ioctl		= cuse_file_ioctl,
 	.compat_ioctl		= cuse_file_compat_ioctl,
 	.poll			= fuse_file_poll,
-	.llseek		= noop_llseek,
+	.llseek			= noop_llseek,
+	.mmap			= cuse_mmap,
 };
 
 
@@ -468,10 +907,26 @@ err:
 
 static void cuse_fc_release(struct fuse_conn *fc)
 {
+	struct fuse_dmmap_region *fdr;
 	struct cuse_conn *cc = fc_to_cc(fc);
+
+	spin_lock(&fc->lock);
+	while (!list_empty(&fc->dmmap_list)) {
+
+		fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
+		fuse_dmmap_region_put(fc, fdr);
+	}
+	spin_unlock(&fc->lock);
+
 	kfree_rcu(cc, fc.rcu);
 }
 
+static const struct fuse_conn_operations cuse_ops = {
+	.release = cuse_fc_release,
+	.notify_store = fuse_notify_store_to_dmmap,
+	.notify_retrieve = fuse_notify_retrieve_from_dmmap,
+};
+
 /**
  * cuse_channel_open - open method for /dev/cuse
  * @inode: inode for /dev/cuse
@@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
 	}
 
 	INIT_LIST_HEAD(&cc->list);
-	cc->fc.release = cuse_fc_release;
+	cc->fc.ops = &cuse_ops;
 
 	cc->fc.initialized = 1;
 	rc = cuse_send_init(cc);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 80cc1b3..0faf92c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 	__clear_bit(FR_BACKGROUND, &req->flags);
 	return req;
 }
+EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);
 
 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 {
@@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 }
 EXPORT_SYMBOL_GPL(fuse_request_send_background);
 
-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
-					  struct fuse_req *req, u64 unique)
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+				   struct fuse_req *req, u64 unique)
 {
 	int err = -ENODEV;
 	struct fuse_iqueue *fiq = &fc->iq;
@@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
 	}
 	return err;
 }
+EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);
 
 /*
  * Unlock request.  If it was aborted while locked, caller is responsible
@@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
  * Copy a page in the request to/from the userspace buffer.  Must be
  * done atomically
  */
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
-			  unsigned offset, unsigned count, int zeroing)
+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+		   unsigned offset, unsigned count, int zeroing)
 {
 	int err;
 	struct page *page = *pagep;
@@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 		flush_dcache_page(page);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(fuse_copy_page);
 
 /* Copy pages in the request to/from userspace buffer */
 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
@@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 			     struct fuse_copy_state *cs)
 {
 	struct fuse_notify_store_out outarg;
-	struct inode *inode;
-	struct address_space *mapping;
-	u64 nodeid;
 	int err;
-	pgoff_t index;
-	unsigned int offset;
-	unsigned int num;
-	loff_t file_size;
-	loff_t end;
 
 	err = -EINVAL;
 	if (size < sizeof(outarg))
@@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 	if (size - sizeof(outarg) != outarg.size)
 		goto out_finish;
 
-	nodeid = outarg.nodeid;
+	err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
+				       outarg.offset);
 
-	down_read(&fc->killsb);
-
-	err = -ENOENT;
-	if (!fc->sb)
-		goto out_up_killsb;
-
-	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
-	if (!inode)
-		goto out_up_killsb;
-
-	mapping = inode->i_mapping;
-	index = outarg.offset >> PAGE_CACHE_SHIFT;
-	offset = outarg.offset & ~PAGE_CACHE_MASK;
-	file_size = i_size_read(inode);
-	end = outarg.offset + outarg.size;
-	if (end > file_size) {
-		file_size = end;
-		fuse_write_update_size(inode, file_size);
-	}
-
-	num = outarg.size;
-	while (num) {
-		struct page *page;
-		unsigned int this_num;
-
-		err = -ENOMEM;
-		page = find_or_create_page(mapping, index,
-					   mapping_gfp_mask(mapping));
-		if (!page)
-			goto out_iput;
-
-		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
-		err = fuse_copy_page(cs, &page, offset, this_num, 0);
-		if (!err && offset == 0 &&
-		    (this_num == PAGE_CACHE_SIZE || file_size == end))
-			SetPageUptodate(page);
-		unlock_page(page);
-		page_cache_release(page);
-
-		if (err)
-			goto out_iput;
-
-		num -= this_num;
-		offset = 0;
-		index++;
-	}
-
-	err = 0;
-
-out_iput:
-	iput(inode);
-out_up_killsb:
-	up_read(&fc->killsb);
 out_finish:
 	fuse_copy_finish(cs);
 	return err;
 }
 
-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
-{
-	release_pages(req->pages, req->num_pages, false);
-}
-
-static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
-			 struct fuse_notify_retrieve_out *outarg)
-{
-	int err;
-	struct address_space *mapping = inode->i_mapping;
-	struct fuse_req *req;
-	pgoff_t index;
-	loff_t file_size;
-	unsigned int num;
-	unsigned int offset;
-	size_t total_len = 0;
-	int num_pages;
-
-	offset = outarg->offset & ~PAGE_CACHE_MASK;
-	file_size = i_size_read(inode);
-
-	num = outarg->size;
-	if (outarg->offset > file_size)
-		num = 0;
-	else if (outarg->offset + num > file_size)
-		num = file_size - outarg->offset;
-
-	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
-
-	req = fuse_get_req(fc, num_pages);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_NOTIFY_REPLY;
-	req->in.h.nodeid = outarg->nodeid;
-	req->in.numargs = 2;
-	req->in.argpages = 1;
-	req->page_descs[0].offset = offset;
-	req->end = fuse_retrieve_end;
-
-	index = outarg->offset >> PAGE_CACHE_SHIFT;
-
-	while (num && req->num_pages < num_pages) {
-		struct page *page;
-		unsigned int this_num;
-
-		page = find_get_page(mapping, index);
-		if (!page)
-			break;
-
-		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
-		req->pages[req->num_pages] = page;
-		req->page_descs[req->num_pages].length = this_num;
-		req->num_pages++;
-
-		offset = 0;
-		num -= this_num;
-		total_len += this_num;
-		index++;
-	}
-	req->misc.retrieve_in.offset = outarg->offset;
-	req->misc.retrieve_in.size = total_len;
-	req->in.args[0].size = sizeof(req->misc.retrieve_in);
-	req->in.args[0].value = &req->misc.retrieve_in;
-	req->in.args[1].size = total_len;
-
-	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-	if (err)
-		fuse_retrieve_end(fc, req);
-
-	return err;
-}
-
 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
 				struct fuse_copy_state *cs)
 {
 	struct fuse_notify_retrieve_out outarg;
-	struct inode *inode;
 	int err;
 
 	err = -EINVAL;
@@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
 
 	fuse_copy_finish(cs);
 
-	down_read(&fc->killsb);
-	err = -ENOENT;
-	if (fc->sb) {
-		u64 nodeid = outarg.nodeid;
-
-		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
-		if (inode) {
-			err = fuse_retrieve(fc, inode, &outarg);
-			iput(inode);
-		}
-	}
-	up_read(&fc->killsb);
+	err = fc->ops->notify_retrieve(fc, &outarg);
 
 	return err;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4051131..a56222b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -337,6 +337,7 @@ struct fuse_req {
 			struct fuse_req *next;
 		} write;
 		struct fuse_notify_retrieve_in retrieve_in;
+		struct fuse_munmap_in munmap_in;
 	} misc;
 
 	/** page vector */
@@ -431,6 +432,21 @@ struct fuse_dev {
 	struct list_head entry;
 };
 
+struct fuse_copy_state;
+
+struct fuse_conn_operations {
+	/** Called on final put */
+	void (*release)(struct fuse_conn *);
+
+	/** Called to store data into a mapping */
+	int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
+			    u64 nodeid, u32 size, u64 pos);
+
+	/** Called to retrieve data from a mapping */
+	int (*notify_retrieve)(struct fuse_conn *,
+			       struct fuse_notify_retrieve_out *);
+};
+
 /**
  * A Fuse connection.
  *
@@ -578,6 +594,9 @@ struct fuse_conn {
 	/** Is poll not implemented by fs? */
 	unsigned no_poll:1;
 
+	/** Is direct mmap not implemente by fs? */
+	unsigned no_dmmap:1;
+
 	/** Do multi-page cached writes */
 	unsigned big_writes:1;
 
@@ -635,9 +654,6 @@ struct fuse_conn {
 	/** Version counter for attribute changes */
 	u64 attr_version;
 
-	/** Called on final put */
-	void (*release)(struct fuse_conn *);
-
 	/** Super block for this connection. */
 	struct super_block *sb;
 
@@ -646,6 +662,12 @@ struct fuse_conn {
 
 	/** List of device instances belonging to this connection */
 	struct list_head devices;
+
+	/** List of direct mmaps (currently CUSE only) */
+	struct list_head dmmap_list;
+
+	/** Operations that fuse and cuse can implement differently */
+	const struct fuse_conn_operations *ops;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 
 void fuse_set_initialized(struct fuse_conn *fc);
 
+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+		   unsigned offset, unsigned count, int zeroing);
+
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+				   struct fuse_req *req, u64 unique);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ac81f48..5284b84 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	fc->connected = 1;
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+	INIT_LIST_HEAD(&fc->dmmap_list);
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
 	if (atomic_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
-		fc->release(fc);
+		fc->ops->release(fc);
 	}
 }
 EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
 }
 EXPORT_SYMBOL_GPL(fuse_dev_free);
 
+static int fuse_notify_store_to_inode(struct fuse_conn *fc,
+				      struct fuse_copy_state *cs,
+				      u64 nodeid, u32 size, u64 pos)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	pgoff_t index;
+	unsigned int off;
+	loff_t file_size;
+	loff_t end;
+	int err;
+
+	down_read(&fc->killsb);
+
+	err = -ENOENT;
+	if (!fc->sb)
+		goto out_up_killsb;
+
+	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+	if (!inode)
+		goto out_up_killsb;
+
+	mapping = inode->i_mapping;
+	index = pos >> PAGE_CACHE_SHIFT;
+	off = pos & ~PAGE_CACHE_MASK;
+	file_size = i_size_read(inode);
+	end = pos + size;
+	if (end > file_size) {
+		file_size = end;
+		fuse_write_update_size(inode, file_size);
+	}
+
+	while (size) {
+		struct page *page;
+		unsigned int this_num;
+
+		err = -ENOMEM;
+		page = find_or_create_page(mapping, index,
+					   mapping_gfp_mask(mapping));
+		if (!page)
+			goto out_iput;
+
+		this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
+		err = fuse_copy_page(cs, &page, off, this_num, 0);
+		if (!err && off == 0 && (size != 0 || file_size == end))
+			SetPageUptodate(page);
+		unlock_page(page);
+		page_cache_release(page);
+
+		if (err)
+			goto out_iput;
+
+		size -= this_num;
+		off = 0;
+		index++;
+	}
+
+	err = 0;
+
+out_iput:
+	iput(inode);
+out_up_killsb:
+	up_read(&fc->killsb);
+
+	return err;
+}
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+	release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+			 struct fuse_notify_retrieve_out *outarg)
+{
+	int err;
+	struct address_space *mapping = inode->i_mapping;
+	struct fuse_req *req;
+	pgoff_t index;
+	loff_t file_size;
+	unsigned int num;
+	unsigned int offset;
+	size_t total_len = 0;
+
+	req = fuse_get_req(fc, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+	req->in.h.opcode = FUSE_NOTIFY_REPLY;
+	req->in.h.nodeid = outarg->nodeid;
+	req->in.numargs = 2;
+	req->in.argpages = 1;
+	req->end = fuse_retrieve_end;
+
+	index = outarg->offset >> PAGE_CACHE_SHIFT;
+	file_size = i_size_read(inode);
+	num = outarg->size;
+	if (outarg->offset > file_size)
+		num = 0;
+	else if (outarg->offset + num > file_size)
+		num = file_size - outarg->offset;
+
+	while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+		struct page *page;
+		unsigned int this_num;
+
+		page = find_get_page(mapping, index);
+		if (!page)
+			break;
+
+		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		req->pages[req->num_pages] = page;
+		req->num_pages++;
+
+		num -= this_num;
+		total_len += this_num;
+		index++;
+	}
+	req->misc.retrieve_in.offset = outarg->offset;
+	req->misc.retrieve_in.size = total_len;
+	req->in.args[0].size = sizeof(req->misc.retrieve_in);
+	req->in.args[0].value = &req->misc.retrieve_in;
+	req->in.args[1].size = total_len;
+
+	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+	if (err)
+		fuse_retrieve_end(fc, req);
+
+	return err;
+}
+
+static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
+				struct fuse_notify_retrieve_out *outarg)
+{
+	struct inode *inode;
+	int err;
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (fc->sb) {
+		u64 nodeid = outarg->nodeid;
+
+		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+		if (inode) {
+			err = fuse_retrieve(fc, inode, outarg);
+			iput(inode);
+		}
+	}
+	up_read(&fc->killsb);
+
+	return err;
+}
+
+static const struct fuse_conn_operations fuse_default_ops = {
+	.release = fuse_free_conn,
+	.notify_store = fuse_notify_store_to_inode,
+	.notify_retrieve = fuse_notify_retrieve_from_inode,
+};
+
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct fuse_dev *fud;
@@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 		goto err_fput;
 
 	fuse_conn_init(fc);
-	fc->release = fuse_free_conn;
+	fc->ops = &fuse_default_ops;
 
 	fud = fuse_dev_alloc(fc);
 	if (!fud)
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index c9aca04..3f4c54b 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -102,6 +102,7 @@
  *  - add ctime and ctimensec to fuse_setattr_in
  *  - add FUSE_RENAME2 request
  *  - add FUSE_NO_OPEN_SUPPORT flag
+ *  - add FUSE_MMAP and FUSE_MUNMAP
  */
 
 #ifndef _LINUX_FUSE_H
@@ -358,6 +359,8 @@ enum fuse_opcode {
 	FUSE_FALLOCATE     = 43,
 	FUSE_READDIRPLUS   = 44,
 	FUSE_RENAME2       = 45,
+	FUSE_MMAP          = 46,
+	FUSE_MUNMAP        = 47,
 
 	/* CUSE specific operations */
 	CUSE_INIT          = 4096,
@@ -670,6 +673,29 @@ struct fuse_fallocate_in {
 	uint32_t	padding;
 };
 
+struct fuse_mmap_in {
+	__u64	fh;
+	__u64	addr;
+	__u64	len;
+	__u32	prot;
+	__u32	flags;
+	__u64	offset;
+};
+
+struct fuse_mmap_out {
+	__u64	mapid;		/* Mmap ID, same namespace as Inode ID */
+	__u64	size;		/* Size of memory region */
+	__u64	reserved;
+};
+
+struct fuse_munmap_in {
+	__u64	fh;
+	__u64	mapid;
+	__u64	size;		/* Size of memory region */
+	__u64	offset;
+	__u64	reserved;
+};
+
 struct fuse_in_header {
 	uint32_t	len;
 	uint32_t	opcode;
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ