lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 16 Aug 2013 16:22:11 -0700
From:	Andy Lutomirski <luto@...capital.net>
To:	linux-kernel@...r.kernel.org
Cc:	linux-ext4@...r.kernel.org, Dave Chinner <david@...morbit.com>,
	Theodore Ts'o <tytso@....edu>,
	Dave Hansen <dave.hansen@...ux.intel.com>, xfs@....sgi.com,
	Jan Kara <jack@...e.cz>, Tim Chen <tim.c.chen@...ux.intel.com>,
	Christoph Hellwig <hch@...radead.org>,
	Andy Lutomirski <luto@...capital.net>
Subject: [PATCH v3 4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC

This is probably unimportant but improves POSIX compliance.

Signed-off-by: Andy Lutomirski <luto@...capital.net>
---
 mm/msync.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/mm/msync.c b/mm/msync.c
index 632df45..9e41acd 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,13 +13,16 @@
 #include <linux/file.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
 
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
  * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * Now all it does is ensure that file timestamps get updated, since POSIX
+ * requires it.  We track dirty pages correct without MS_ASYNC.
  *
  * The application may now run fsync() to
  * write out the dirty pages and wait on the writeout and check the result.
@@ -28,6 +31,57 @@
  * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  * applications.
  */
+
+static int msync_async_range(struct vm_area_struct *vma,
+			      unsigned long *start, unsigned long end)
+{
+	struct mm_struct *mm;
+	struct address_space *mapping;
+	int iters = 0;
+
+	while (*start < end && *start < vma->vm_end && iters < 128) {
+		unsigned int page_mask, page_increm;
+
+		/*
+		 * Require that the pte writable (because otherwise it can't
+		 * be dirty, so there's nothing to clean).
+		 *
+		 * In theory we could check the pte dirty bit, but this is
+		 * awkward and barely worth it.
+		 */
+		struct page *page = follow_page_mask(vma, *start,
+						     FOLL_GET | FOLL_WRITE,
+						     &page_mask);
+
+		if (page && !IS_ERR(page)) {
+			if (lock_page_killable(page) == 0) {
+				page_mkclean(page);
+				unlock_page(page);
+			}
+			put_page(page);
+		}
+
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		page_increm = 1 + (~(*start >> PAGE_SHIFT) & page_mask);
+		*start += page_increm * PAGE_SIZE;
+		cond_resched();
+		iters++;
+	}
+
+	/* XXX: try to do this only once? */
+	mapping = vma->vm_file->f_mapping;
+	if (mapping->a_ops->flush_cmtime)
+		mapping->a_ops->flush_cmtime(mapping);
+
+	/* Give mmap_sem writers a chance. */
+	mm = current->mm;
+	up_read(&mm->mmap_sem);
+	down_read(&mm->mmap_sem);
+	return 0;
+}
+
 SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 {
 	unsigned long end;
@@ -77,18 +131,25 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 			goto out_unlock;
 		}
 		file = vma->vm_file;
-		start = vma->vm_end;
-		if ((flags & MS_SYNC) && file &&
-				(vma->vm_flags & VM_SHARED)) {
-			get_file(file);
-			up_read(&mm->mmap_sem);
-			error = vfs_fsync(file, 0);
-			fput(file);
-			if (error || start >= end)
-				goto out;
-			down_read(&mm->mmap_sem);
+		if (file && vma->vm_flags & VM_SHARED) {
+			if (flags & MS_SYNC) {
+				start = vma->vm_end;
+				get_file(file);
+				up_read(&mm->mmap_sem);
+				error = vfs_fsync(file, 0);
+				fput(file);
+				if (error || start >= end)
+					goto out;
+				down_read(&mm->mmap_sem);
+			} else if ((vma->vm_flags & VM_WRITE) &&
+				   file->f_mapping) {
+				error = msync_async_range(vma, &start, end);
+			} else {
+				start = vma->vm_end;
+			}
 			vma = find_vma(mm, start);
 		} else {
+			start = vma->vm_end;
 			if (start >= end) {
 				error = 0;
 				goto out_unlock;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ