lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120809123039.8542.18593.stgit@ltc189.sdl.hitachi.co.jp>
Date:	Thu, 09 Aug 2012 21:30:39 +0900
From:	Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@...achi.com>
To:	linux-kernel@...r.kernel.org
Cc:	Amit Shah <amit.shah@...hat.com>,
	Anthony Liguori <anthony@...emonkey.ws>,
	Arnd Bergmann <arnd@...db.de>, Borislav Petkov <bp@...64.org>,
	"Franch Ch. Eigler" <fche@...hat.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Herbert Xu <herbert@...dor.apana.org.au>,
	Ingo Molnar <mingo@...hat.com>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>,
	Rusty Russell <rusty@...tcorp.com.au>,
	Steven Rostedt <rostedt@...dmis.org>,
	virtualization@...ts.linux-foundation.org, qemu-devel@...gnu.org,
	yrl.pp-manager.tt@...achi.com,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>
Subject: [PATCH V2 1/6] virtio/console: Add splice_write support

From: Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>

Enable to use splice_write from pipe to virtio-console port.
This steals pages from pipe and directly send it to host.

Note that this may accelerate only the guest to host path.

Changes in v2:
 - Use GFP_KERNEL instead of GFP_ATOMIC in syscall context function.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>
---

 drivers/char/virtio_console.c |  136 +++++++++++++++++++++++++++++++++++++++--
 1 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index cdf2f54..730816c 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -24,6 +24,8 @@
 #include <linux/err.h>
 #include <linux/freezer.h>
 #include <linux/fs.h>
+#include <linux/splice.h>
+#include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/poll.h>
@@ -227,6 +229,7 @@ struct port {
 	bool guest_connected;
 };
 
+#define MAX_SPLICE_PAGES	32
 /* This is the very early arch-specified put chars function. */
 static int (*early_put_chars)(u32, const char *, int);
 
@@ -474,26 +477,52 @@ static ssize_t send_control_msg(struct port *port, unsigned int event,
 	return 0;
 }
 
+struct buffer_token {
+	union {
+		void *buf;
+		struct scatterlist *sg;
+	} u;
+	bool sgpages;
+};
+
+static void reclaim_sg_pages(struct scatterlist *sg)
+{
+	int i;
+	struct page *page;
+
+	for (i = 0; i < MAX_SPLICE_PAGES; i++) {
+		page = sg_page(&sg[i]);
+		if (!page)
+			break;
+		put_page(page);
+	}
+	kfree(sg);
+}
+
 /* Callers must take the port->outvq_lock */
 static void reclaim_consumed_buffers(struct port *port)
 {
-	void *buf;
+	struct buffer_token *tok;
 	unsigned int len;
 
 	if (!port->portdev) {
 		/* Device has been unplugged.  vqs are already gone. */
 		return;
 	}
-	while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
-		kfree(buf);
+	while ((tok = virtqueue_get_buf(port->out_vq, &len))) {
+		if (tok->sgpages)
+			reclaim_sg_pages(tok->u.sg);
+		else
+			kfree(tok->u.buf);
+		kfree(tok);
 		port->outvq_full = false;
 	}
 }
 
-static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
-			bool nonblock)
+static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
+			      int nents, size_t in_count,
+			      struct buffer_token *tok, bool nonblock)
 {
-	struct scatterlist sg[1];
 	struct virtqueue *out_vq;
 	ssize_t ret;
 	unsigned long flags;
@@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
 
 	reclaim_consumed_buffers(port);
 
-	sg_init_one(sg, in_buf, in_count);
-	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
+	ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC);
 
 	/* Tell Host to go! */
 	virtqueue_kick(out_vq);
@@ -544,6 +572,37 @@ done:
 	return in_count;
 }
 
+static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
+			bool nonblock)
+{
+	struct scatterlist sg[1];
+	struct buffer_token *tok;
+
+	tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
+	if (!tok)
+		return -ENOMEM;
+	tok->sgpages = false;
+	tok->u.buf = in_buf;
+
+	sg_init_one(sg, in_buf, in_count);
+
+	return __send_to_port(port, sg, 1, in_count, tok, nonblock);
+}
+
+static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents,
+			  size_t in_count, bool nonblock)
+{
+	struct buffer_token *tok;
+
+	tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
+	if (!tok)
+		return -ENOMEM;
+	tok->sgpages = true;
+	tok->u.sg = sg;
+
+	return __send_to_port(port, sg, nents, in_count, tok, nonblock);
+}
+
 /*
  * Give out the data that's requested from the buffer that we have
  * queued up.
@@ -725,6 +784,66 @@ out:
 	return ret;
 }
 
+struct sg_list {
+	unsigned int n;
+	size_t len;
+	struct scatterlist *sg;
+};
+
+static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+			struct splice_desc *sd)
+{
+	struct sg_list *sgl = sd->u.data;
+	unsigned int len = 0;
+
+	if (sgl->n == MAX_SPLICE_PAGES)
+		return 0;
+
+	/* Try lock this page */
+	if (buf->ops->steal(pipe, buf) == 0) {
+		/* Get reference and unlock page for moving */
+		get_page(buf->page);
+		unlock_page(buf->page);
+
+		len = min(buf->len, sd->len);
+		sg_set_page(&(sgl->sg[sgl->n]), buf->page, len, buf->offset);
+		sgl->n++;
+		sgl->len += len;
+	}
+
+	return len;
+}
+
+/* Faster zero-copy write by splicing */
+static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
+				      struct file *filp, loff_t *ppos,
+				      size_t len, unsigned int flags)
+{
+	struct port *port = filp->private_data;
+	struct sg_list sgl;
+	ssize_t ret;
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.data = &sgl,
+	};
+
+	sgl.n = 0;
+	sgl.len = 0;
+	sgl.sg = kmalloc(sizeof(struct scatterlist) * MAX_SPLICE_PAGES,
+			 GFP_KERNEL);
+	if (unlikely(!sgl.sg))
+		return -ENOMEM;
+
+	sg_init_table(sgl.sg, MAX_SPLICE_PAGES);
+	ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
+	if (likely(ret > 0))
+		ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true);
+
+	return ret;
+}
+
 static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
 {
 	struct port *port;
@@ -856,6 +975,7 @@ static const struct file_operations port_fops = {
 	.open  = port_fops_open,
 	.read  = port_fops_read,
 	.write = port_fops_write,
+	.splice_write = port_fops_splice_write,
 	.poll  = port_fops_poll,
 	.release = port_fops_release,
 	.fasync = port_fops_fasync,


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ