lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1381177293-27125-2-git-send-email-rcj@linux.vnet.ibm.com>
Date:	Mon,  7 Oct 2013 15:21:32 -0500
From:	Robert C Jennings <rcj@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	linux-fsdevel@...r.kernel.org, linux-mm@...ck.org,
	Alexander Viro <viro@...iv.linux.org.uk>,
	Rik van Riel <riel@...hat.com>,
	Andrea Arcangeli <aarcange@...hat.com>,
	Dave Hansen <dave@...1.net>,
	Robert C Jennings <rcj@...ux.vnet.ibm.com>,
	Matt Helsley <matt.helsley@...il.com>,
	Anthony Liguori <anthony@...emonkey.ws>,
	Michael Roth <mdroth@...ux.vnet.ibm.com>,
	Lei Li <lilei@...ux.vnet.ibm.com>,
	Leonardo Garcia <lagarcia@...ux.vnet.ibm.com>,
	Vlastimil Babka <vbabka@...e.cz>
Subject: [PATCH 1/2] vmsplice: unmap gifted pages for recipient

Introduce use of the unused SPLICE_F_MOVE flag for vmsplice to zap
pages.

When vmsplice is called with flags (SPLICE_F_GIFT | SPLICE_F_MOVE) the
writer's gift'ed pages would be zapped.  This patch supports further work
to move vmsplice'd pages rather than copying them.  That patch has the
restriction that the page must not be mapped by the source for the move,
otherwise it will fall back to copying the page.

Signed-off-by: Matt Helsley <matt.helsley@...il.com>
Signed-off-by: Robert C Jennings <rcj@...ux.vnet.ibm.com>
---
Since the RFC went out I have coalesced the zap_page_range() call to
operate on VMAs rather than calling this for each page.  For a 256MB
vmsplice this reduced the write side 50% from the RFC.
---
 fs/splice.c            | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/splice.h |  1 +
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/fs/splice.c b/fs/splice.c
index 3b7ee65..a62d61e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -188,12 +188,17 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 {
 	unsigned int spd_pages = spd->nr_pages;
 	int ret, do_wakeup, page_nr;
+	struct vm_area_struct *vma;
+	unsigned long user_start, user_end;
 
 	ret = 0;
 	do_wakeup = 0;
 	page_nr = 0;
+	vma = NULL;
+	user_start = user_end = 0;
 
 	pipe_lock(pipe);
+	down_read(&current->mm->mmap_sem);
 
 	for (;;) {
 		if (!pipe->readers) {
@@ -212,8 +217,44 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 			buf->len = spd->partial[page_nr].len;
 			buf->private = spd->partial[page_nr].private;
 			buf->ops = spd->ops;
-			if (spd->flags & SPLICE_F_GIFT)
+			if (spd->flags & SPLICE_F_GIFT) {
+				unsigned long useraddr =
+						spd->partial[page_nr].useraddr;
+
+				if ((spd->flags & SPLICE_F_MOVE) &&
+						!buf->offset &&
+						(buf->len == PAGE_SIZE)) {
+					/* Can move page aligned buf, gather
+					 * requests to make a single
+					 * zap_page_range() call per VMA
+					 */
+					if (vma && (useraddr == user_end) &&
+						   ((useraddr + PAGE_SIZE) <=
+						    vma->vm_end)) {
+						/* same vma, no holes */
+						user_end += PAGE_SIZE;
+					} else {
+						if (vma)
+							zap_page_range(vma,
+								user_start,
+								(user_end -
+								 user_start),
+								NULL);
+						vma = find_vma_intersection(
+								current->mm,
+								useraddr,
+								(useraddr +
+								 PAGE_SIZE));
+						if (!IS_ERR_OR_NULL(vma)) {
+							user_start = useraddr;
+							user_end = (useraddr +
+								    PAGE_SIZE);
+						} else
+							vma = NULL;
+					}
+				}
 				buf->flags |= PIPE_BUF_FLAG_GIFT;
+			}
 
 			pipe->nrbufs++;
 			page_nr++;
@@ -255,6 +296,10 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 		pipe->waiting_writers--;
 	}
 
+	if (vma)
+		zap_page_range(vma, user_start, (user_end - user_start), NULL);
+
+	up_read(&current->mm->mmap_sem);
 	pipe_unlock(pipe);
 
 	if (do_wakeup)
@@ -485,6 +530,7 @@ fill_it:
 
 		spd.partial[page_nr].offset = loff;
 		spd.partial[page_nr].len = this_len;
+		spd.partial[page_nr].useraddr = index << PAGE_CACHE_SHIFT;
 		len -= this_len;
 		loff = 0;
 		spd.nr_pages++;
@@ -656,6 +702,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 		this_len = min_t(size_t, vec[i].iov_len, res);
 		spd.partial[i].offset = 0;
 		spd.partial[i].len = this_len;
+		spd.partial[i].useraddr = (unsigned long)vec[i].iov_base;
 		if (!this_len) {
 			__free_page(spd.pages[i]);
 			spd.pages[i] = NULL;
@@ -1475,6 +1522,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 
 			partial[buffers].offset = off;
 			partial[buffers].len = plen;
+			partial[buffers].useraddr = (unsigned long)base;
+			base = (void*)((unsigned long)base + PAGE_SIZE);
 
 			off = 0;
 			len -= plen;
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 74575cb..56661e3 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -44,6 +44,7 @@ struct partial_page {
 	unsigned int offset;
 	unsigned int len;
 	unsigned long private;
+	unsigned long useraddr;
 };
 
 /*
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ