lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120719124922.6250.9190.stgit@ltc137.sdl.hitachi.co.jp>
Date:	Thu, 19 Jul 2012 21:49:22 +0900
From:	Mitsuo Hayasaka <mitsuo.hayasaka.hu@...achi.com>
To:	Miklos Szeredi <miklos@...redi.hu>,
	Alexander Viro <viro@...iv.linux.org.uk>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Muthukumar R <muthur@...il.com>
Cc:	fuse-devel@...ts.sourceforge.net, linux-kernel@...r.kernel.org,
	linux-fsdevel@...r.kernel.org, linux-doc@...r.kernel.org,
	yrl.pp-manager.tt@...achi.com,
	Mitsuo Hayasaka <mitsuo.hayasaka.hu@...achi.com>,
	Miklos Szeredi <miklos@...redi.hu>,
	Nikolaus Rath <Nikolaus@...h.org>,
	Liu Yuan <namei.unix@...il.com>,
	Has-Wen Nienhuys <hanwen@...all.nl>
Subject: [PATCH -v2 2/6] fuse: make the maximum read/write request size tunable

Make the maximum read/write request size tunable between
32 pages and the number of pages equivalent to pipe_max_size.
The max_read/max_write mount options affect the size. The
32 pages are used by default without these options.

Currently, the maximum read/write request size is limited to
FUSE_MAX_PAGES_PER_REQ which is equal to 32 pages. It is
required to change it in order to maximize the throughput
since the optimized value depends on various factors such as
type and version of local filesystems used and hardware specs,
etc.

In addition, recently FUSE is widely used as a gateway to
connect cloud storage services and distributed filesystems.
Larger data might be stored in them over networking via FUSE
and the overhead might affect the read/write throughput.

So, a tunable functionality of read/write request size is
useful.

Signed-off-by: Mitsuo Hayasaka <mitsuo.hayasaka.hu@...achi.com>
Cc: Miklos Szeredi <miklos@...redi.hu>
Cc: Nikolaus Rath <Nikolaus@...h.org>
Cc: Liu Yuan <namei.unix@...il.com>
Cc: Has-Wen Nienhuys <hanwen@...all.nl>
---

 fs/fuse/dev.c    |   27 ++++++++++++++-------------
 fs/fuse/file.c   |   32 +++++++++++++++++---------------
 fs/fuse/fuse_i.h |   27 +++++++++++++++++----------
 fs/fuse/inode.c  |   42 +++++++++++++++++++++++++++++++++++-------
 4 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e..511560b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,35 +34,36 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
 	return file->private_data;
 }
 
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_conn *fc, struct fuse_req *req)
 {
-	memset(req, 0, sizeof(*req));
+	memset(req, 0, fc->fuse_req_size);
 	INIT_LIST_HEAD(&req->list);
 	INIT_LIST_HEAD(&req->intr_entry);
 	init_waitqueue_head(&req->waitq);
 	atomic_set(&req->count, 1);
 }
 
-struct fuse_req *fuse_request_alloc(void)
+struct fuse_req *fuse_request_alloc(struct fuse_conn *fc)
 {
-	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
+	struct fuse_req *req = kmalloc(fc->fuse_req_size, GFP_KERNEL);
+
 	if (req)
-		fuse_request_init(req);
+		fuse_request_init(fc, req);
 	return req;
 }
 EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
-struct fuse_req *fuse_request_alloc_nofs(void)
+struct fuse_req *fuse_request_alloc_nofs(struct fuse_conn *fc)
 {
-	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+	struct fuse_req *req = kmalloc(fc->fuse_req_size, GFP_NOFS);
 	if (req)
-		fuse_request_init(req);
+		fuse_request_init(fc, req);
 	return req;
 }
 
 void fuse_request_free(struct fuse_req *req)
 {
-	kmem_cache_free(fuse_req_cachep, req);
+	kfree(req);
 }
 
 static void block_sigs(sigset_t *oldset)
@@ -116,7 +117,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 	if (!fc->connected)
 		goto out;
 
-	req = fuse_request_alloc();
+	req = fuse_request_alloc(fc);
 	err = -ENOMEM;
 	if (!req)
 		goto out;
@@ -166,7 +167,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 	struct fuse_file *ff = file->private_data;
 
 	spin_lock(&fc->lock);
-	fuse_request_init(req);
+	fuse_request_init(fc, req);
 	BUG_ON(ff->reserved_req);
 	ff->reserved_req = req;
 	wake_up_all(&fc->reserved_req_waitq);
@@ -193,7 +194,7 @@ struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
 
 	atomic_inc(&fc->num_waiting);
 	wait_event(fc->blocked_waitq, !fc->blocked);
-	req = fuse_request_alloc();
+	req = fuse_request_alloc(fc);
 	if (!req)
 		req = get_reserved_req(fc, file);
 
@@ -1564,7 +1565,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 	else if (outarg->offset + num > file_size)
 		num = file_size - outarg->offset;
 
-	while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+	while (num && req->num_pages < fc->max_pages) {
 		struct page *page;
 		unsigned int this_num;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a68..7b96b00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 		return NULL;
 
 	ff->fc = fc;
-	ff->reserved_req = fuse_request_alloc();
+	ff->reserved_req = fuse_request_alloc(fc);
 	if (unlikely(!ff->reserved_req)) {
 		kfree(ff);
 		return NULL;
@@ -653,7 +653,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
 	fuse_wait_on_page_writeback(inode, page->index);
 
 	if (req->num_pages &&
-	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+	    (req->num_pages == fc->max_pages ||
 	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
 	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
 		fuse_send_readpages(req, data->file);
@@ -866,7 +866,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		if (!fc->big_writes)
 			break;
 	} while (iov_iter_count(ii) && count < fc->max_write &&
-		 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+		 req->num_pages < fc->max_pages && offset == 0);
 
 	return count > 0 ? count : err;
 }
@@ -1020,8 +1020,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
 	}
 }
 
-static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
-			       size_t *nbytesp, int write)
+static int fuse_get_user_pages(struct fuse_conn *fc, struct fuse_req *req,
+			       const char __user *buf, size_t *nbytesp,
+			       int write)
 {
 	size_t nbytes = *nbytesp;
 	unsigned long user_addr = (unsigned long) buf;
@@ -1038,9 +1039,9 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 		return 0;
 	}
 
-	nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+	nbytes = min_t(size_t, nbytes, fc->max_pages << PAGE_SHIFT);
 	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
+	npages = clamp(npages, 1, (int)fc->max_pages);
 	npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
 	if (npages < 0)
 		return npages;
@@ -1077,7 +1078,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		size_t nres;
 		fl_owner_t owner = current->files;
 		size_t nbytes = min(count, nmax);
-		int err = fuse_get_user_pages(req, buf, &nbytes, write);
+		int err = fuse_get_user_pages(fc, req, buf, &nbytes, write);
 		if (err) {
 			res = err;
 			break;
@@ -1269,7 +1270,7 @@ static int fuse_writepage_locked(struct page *page)
 
 	set_page_writeback(page);
 
-	req = fuse_request_alloc_nofs();
+	req = fuse_request_alloc_nofs(fc);
 	if (!req)
 		goto err;
 
@@ -1695,10 +1696,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
 }
 
 /* Make sure iov_length() won't overflow */
-static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
+				 size_t count)
 {
 	size_t n;
-	u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+	u32 max = fc->max_pages << PAGE_SHIFT;
 
 	for (n = 0; n < count; n++) {
 		if (iov->iov_len > (size_t) max)
@@ -1821,7 +1823,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 	BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
 	err = -ENOMEM;
-	pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
+	pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
 	iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
 	if (!pages || !iov_page)
 		goto out;
@@ -1860,7 +1862,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 
 	/* make sure there are enough buffer pages and init request with them */
 	err = -ENOMEM;
-	if (max_pages > FUSE_MAX_PAGES_PER_REQ)
+	if (max_pages > fc->max_pages)
 		goto out;
 	while (num_pages < max_pages) {
 		pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -1943,11 +1945,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		in_iov = iov_page;
 		out_iov = in_iov + in_iovs;
 
-		err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+		err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
 		if (err)
 			goto out;
 
-		err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+		err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
 		if (err)
 			goto out;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 771fb63..46df615 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -22,9 +22,10 @@
 #include <linux/rbtree.h>
 #include <linux/poll.h>
 #include <linux/workqueue.h>
+#include <linux/pipe_fs_i.h>
 
-/** Max number of pages that can be used in a single read request */
-#define FUSE_MAX_PAGES_PER_REQ 32
+/** Default number of pages that can be used in a single read/write request */
+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
 
 /** Bias for fi->writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
@@ -290,12 +291,6 @@ struct fuse_req {
 		struct fuse_lk_in lk_in;
 	} misc;
 
-	/** page vector */
-	struct page *pages[FUSE_MAX_PAGES_PER_REQ];
-
-	/** number of pages in vector */
-	unsigned num_pages;
-
 	/** offset of data on first page */
 	unsigned page_offset;
 
@@ -313,6 +308,12 @@ struct fuse_req {
 
 	/** Request is stolen from fuse_file->reserved_req */
 	struct file *stolen_file;
+
+	/** number of pages in vector */
+	unsigned num_pages;
+
+	/** page vector */
+	struct page *pages[0];
 };
 
 /**
@@ -347,6 +348,12 @@ struct fuse_conn {
 	/** Maximum write size */
 	unsigned max_write;
 
+	/** Maximum number of pages per req */
+	unsigned max_pages;
+
+	/** fuse_req size per connection */
+	unsigned fuse_req_size;
+
 	/** Readers of the connection are waiting on this */
 	wait_queue_head_t waitq;
 
@@ -655,9 +662,9 @@ void fuse_ctl_cleanup(void);
 /**
  * Allocate a request
  */
-struct fuse_req *fuse_request_alloc(void);
+struct fuse_req *fuse_request_alloc(struct fuse_conn *fc);
 
-struct fuse_req *fuse_request_alloc_nofs(void);
+struct fuse_req *fuse_request_alloc_nofs(struct fuse_conn *fc);
 
 /**
  * Free a request
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1cd6165..f7f3c5d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -550,6 +550,9 @@ void fuse_conn_init(struct fuse_conn *fc)
 	atomic_set(&fc->num_waiting, 0);
 	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
 	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
+	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+	fc->fuse_req_size = sizeof(struct fuse_req) +
+			    fc->max_pages * sizeof(struct page *);
 	fc->khctr = 0;
 	fc->polled_files = RB_ROOT;
 	fc->reqctr = 0;
@@ -774,6 +777,18 @@ static int set_global_limit(const char *val, struct kernel_param *kp)
 	return 0;
 }
 
+static void set_conn_max_pages(struct fuse_conn *fc, unsigned max_pages)
+{
+	unsigned pipe_max_size = pipe_get_max_size();
+	unsigned pipe_max_pages = DIV_ROUND_UP(pipe_max_size, PAGE_SIZE);
+
+	if (max_pages > fc->max_pages) {
+		fc->max_pages = min_t(unsigned, pipe_max_pages, max_pages);
+		fc->fuse_req_size = sizeof(struct fuse_req) +
+				    fc->max_pages * sizeof(struct page *);
+	}
+}
+
 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
 {
 	int cap_sys_admin = capable(CAP_SYS_ADMIN);
@@ -807,6 +822,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		fc->conn_error = 1;
 	else {
 		unsigned long ra_pages;
+		unsigned max_pages;
 
 		process_init_limits(fc, arg);
 
@@ -844,6 +860,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		fc->minor = arg->minor;
 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
 		fc->max_write = max_t(unsigned, 4096, fc->max_write);
+		max_pages = DIV_ROUND_UP(fc->max_write, PAGE_SIZE);
+		set_conn_max_pages(fc, max_pages);
 		fc->conn_init = 1;
 	}
 	fc->blocked = 0;
@@ -880,6 +898,20 @@ static void fuse_free_conn(struct fuse_conn *fc)
 	kfree(fc);
 }
 
+static void fuse_conn_setup(struct fuse_conn *fc,
+			    struct fuse_mount_data *d)
+{
+	unsigned max_pages;
+
+	fc->release = fuse_free_conn;
+	fc->flags = d->flags;
+	fc->user_id = d->user_id;
+	fc->group_id = d->group_id;
+	fc->max_read = max_t(unsigned, 4096, d->max_read);
+	max_pages = DIV_ROUND_UP(fc->max_read, PAGE_SIZE);
+	set_conn_max_pages(fc, max_pages);
+}
+
 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 {
 	int err;
@@ -986,11 +1018,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 		fc->dont_mask = 1;
 	sb->s_flags |= MS_POSIXACL;
 
-	fc->release = fuse_free_conn;
-	fc->flags = d.flags;
-	fc->user_id = d.user_id;
-	fc->group_id = d.group_id;
-	fc->max_read = max_t(unsigned, 4096, d.max_read);
+	fuse_conn_setup(fc, &d);
 
 	/* Used by get_root_inode() */
 	sb->s_fs_info = fc;
@@ -1003,12 +1031,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	/* only now - we want root dentry with NULL ->d_op */
 	sb->s_d_op = &fuse_dentry_operations;
 
-	init_req = fuse_request_alloc();
+	init_req = fuse_request_alloc(fc);
 	if (!init_req)
 		goto err_put_root;
 
 	if (is_bdev) {
-		fc->destroy_req = fuse_request_alloc();
+		fc->destroy_req = fuse_request_alloc(fc);
 		if (!fc->destroy_req)
 			goto err_free_init_req;
 	}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ