lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 30 Jan 2019 13:44:20 +0100
From:   Vlastimil Babka <vbabka@...e.cz>
To:     Andrew Morton <akpm@...ux-foundation.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>
Cc:     linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-api@...r.kernel.org, Peter Zijlstra <peterz@...radead.org>,
        Greg KH <gregkh@...uxfoundation.org>,
        Jann Horn <jannh@...gle.com>, Vlastimil Babka <vbabka@...e.cz>,
        Jiri Kosina <jikos@...nel.org>,
        Dominique Martinet <asmadeus@...ewreck.org>,
        Andy Lutomirski <luto@...capital.net>,
        Dave Chinner <david@...morbit.com>,
        Kevin Easton <kevin@...rana.org>,
        Matthew Wilcox <willy@...radead.org>,
        Cyril Hrubis <chrubis@...e.cz>, Tejun Heo <tj@...nel.org>,
        "Kirill A . Shutemov" <kirill@...temov.name>,
        Daniel Gruss <daniel@...ss.cc>, Jiri Kosina <jkosina@...e.cz>
Subject: [PATCH 3/3] mm/mincore: provide mapped status when cached status is not allowed

After "mm/mincore: make mincore() more conservative" we sometimes restrict the
information about page cache residency, which we have to do without breaking
existing userspace, if possible. We thus fake the resulting values as 1, which
should be safer than faking them as 0, as there might theoretically exist code
that would try to fault in the page(s) until mincore() returns 1.

Faking 1 however means that such code would not fault in a page even if it was
not in page cache, with unwanted performance implications. We can improve the
situation by revisting the approach of 574823bfab82 ("Change mincore() to count
"mapped" pages rather than "cached" pages") but only applying it to cases where
page cache residency check is restricted. Thus mincore() will return 0 for an
unmapped page (which may or may not be resident in a pagecache), and 1 after
the process faults it in.

One potential downside is that mincore() will be again able to recognize when a
previously mapped page was reclaimed. While that might be useful for some
attack scenarios, it's not as crucial as recognizing that somebody else faulted
the page in, and there are also other ways to recognize reclaimed pages anyway.

Cc: Jiri Kosina <jikos@...nel.org>
Cc: Dominique Martinet <asmadeus@...ewreck.org>
Cc: Andy Lutomirski <luto@...capital.net>
Cc: Dave Chinner <david@...morbit.com>
Cc: Kevin Easton <kevin@...rana.org>
Cc: Matthew Wilcox <willy@...radead.org>
Cc: Cyril Hrubis <chrubis@...e.cz>
Cc: Tejun Heo <tj@...nel.org>
Cc: Kirill A. Shutemov <kirill@...temov.name>
Cc: Daniel Gruss <daniel@...ss.cc>
Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
 mm/mincore.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/mm/mincore.c b/mm/mincore.c
index 747a4907a3ac..d6784a803ae7 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -21,12 +21,18 @@
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
 
+struct mincore_walk_private {
+	unsigned char *vec;
+	bool can_check_pagecache;
+};
+
 static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
 			unsigned long end, struct mm_walk *walk)
 {
 #ifdef CONFIG_HUGETLB_PAGE
 	unsigned char present;
-	unsigned char *vec = walk->private;
+	struct mincore_walk_private *walk_private = walk->private;
+	unsigned char *vec = walk_private->vec;
 
 	/*
 	 * Hugepages under user process are always in RAM and never
@@ -35,7 +41,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
 	present = pte && !huge_pte_none(huge_ptep_get(pte));
 	for (; addr != end; vec++, addr += PAGE_SIZE)
 		*vec = present;
-	walk->private = vec;
+	walk_private->vec = vec;
 #else
 	BUG();
 #endif
@@ -85,7 +91,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 }
 
 static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
-				struct vm_area_struct *vma, unsigned char *vec)
+				struct vm_area_struct *vma, unsigned char *vec,
+				bool can_check_pagecache)
 {
 	unsigned long nr = (end - addr) >> PAGE_SHIFT;
 	int i;
@@ -95,7 +102,9 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
 
 		pgoff = linear_page_index(vma, addr);
 		for (i = 0; i < nr; i++, pgoff++)
-			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
+			vec[i] = can_check_pagecache ?
+				 mincore_page(vma->vm_file->f_mapping, pgoff)
+				 : 0;
 	} else {
 		for (i = 0; i < nr; i++)
 			vec[i] = 0;
@@ -106,8 +115,11 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
 static int mincore_unmapped_range(unsigned long addr, unsigned long end,
 				   struct mm_walk *walk)
 {
-	walk->private += __mincore_unmapped_range(addr, end,
-						  walk->vma, walk->private);
+	struct mincore_walk_private *walk_private = walk->private;
+	unsigned char *vec = walk_private->vec;
+
+	walk_private->vec += __mincore_unmapped_range(addr, end, walk->vma,
+				vec, walk_private->can_check_pagecache);
 	return 0;
 }
 
@@ -117,7 +129,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	spinlock_t *ptl;
 	struct vm_area_struct *vma = walk->vma;
 	pte_t *ptep;
-	unsigned char *vec = walk->private;
+	struct mincore_walk_private *walk_private = walk->private;
+	unsigned char *vec = walk_private->vec;
 	int nr = (end - addr) >> PAGE_SHIFT;
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
@@ -128,7 +141,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	}
 
 	if (pmd_trans_unstable(pmd)) {
-		__mincore_unmapped_range(addr, end, vma, vec);
+		__mincore_unmapped_range(addr, end, vma, vec,
+					walk_private->can_check_pagecache);
 		goto out;
 	}
 
@@ -138,7 +152,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 
 		if (pte_none(pte))
 			__mincore_unmapped_range(addr, addr + PAGE_SIZE,
-						 vma, vec);
+				 vma, vec, walk_private->can_check_pagecache);
 		else if (pte_present(pte))
 			*vec = 1;
 		else { /* pte is a swap entry */
@@ -152,8 +166,12 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 				*vec = 1;
 			} else {
 #ifdef CONFIG_SWAP
-				*vec = mincore_page(swap_address_space(entry),
+				if (walk_private->can_check_pagecache)
+					*vec = mincore_page(
+						    swap_address_space(entry),
 						    swp_offset(entry));
+				else
+					*vec = 0;
 #else
 				WARN_ON(1);
 				*vec = 1;
@@ -187,22 +205,21 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
 	struct vm_area_struct *vma;
 	unsigned long end;
 	int err;
+	struct mincore_walk_private walk_private = {
+		.vec = vec
+	};
 	struct mm_walk mincore_walk = {
 		.pmd_entry = mincore_pte_range,
 		.pte_hole = mincore_unmapped_range,
 		.hugetlb_entry = mincore_hugetlb,
-		.private = vec,
+		.private = &walk_private
 	};
 
 	vma = find_vma(current->mm, addr);
 	if (!vma || addr < vma->vm_start)
 		return -ENOMEM;
 	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
-	if (!can_do_mincore(vma)) {
-		unsigned long pages = (end - addr) >> PAGE_SHIFT;
-		memset(vec, 1, pages);
-		return pages;
-	}
+	walk_private.can_check_pagecache = can_do_mincore(vma);
 	mincore_walk.mm = vma->vm_mm;
 	err = walk_page_range(addr, end, &mincore_walk);
 	if (err < 0)
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ