lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun,  4 Aug 2013 05:17:21 +0300
From:	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To:	Andrea Arcangeli <aarcange@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>
Cc:	Al Viro <viro@...iv.linux.org.uk>, Hugh Dickins <hughd@...gle.com>,
	Wu Fengguang <fengguang.wu@...el.com>, Jan Kara <jack@...e.cz>,
	Mel Gorman <mgorman@...e.de>, linux-mm@...ck.org,
	Andi Kleen <ak@...ux.intel.com>,
	Matthew Wilcox <willy@...ux.intel.com>,
	"Kirill A. Shutemov" <kirill@...temov.name>,
	Hillf Danton <dhillf@...il.com>, Dave Hansen <dave@...1.net>,
	Ning Qu <quning@...gle.com>, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [PATCH 19/23] truncate: support huge pages

From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>

truncate_inode_pages_range() drops whole huge page at once if it's fully
inside the range.

If a huge page is only partly in the range we zero out the part,
exactly like we do for partial small pages.

invalidate_mapping_pages() just skips huge pages if they are not fully
in the range.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
 mm/truncate.c | 108 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 84 insertions(+), 24 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 353b683..fcef7cb 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -205,8 +205,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 {
 	pgoff_t		start;		/* inclusive */
 	pgoff_t		end;		/* exclusive */
-	unsigned int	partial_start;	/* inclusive */
-	unsigned int	partial_end;	/* exclusive */
+	bool		partial_thp_start = false, partial_thp_end = false;
 	struct pagevec	pvec;
 	pgoff_t		index;
 	int		i;
@@ -215,15 +214,9 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	if (mapping->nrpages == 0)
 		return;
 
-	/* Offsets within partial pages */
-	partial_start = lstart & (PAGE_CACHE_SIZE - 1);
-	partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
-
 	/*
 	 * 'start' and 'end' always covers the range of pages to be fully
-	 * truncated. Partial pages are covered with 'partial_start' at the
-	 * start of the range and 'partial_end' at the end of the range.
-	 * Note that 'end' is exclusive while 'lend' is inclusive.
+	 * truncated. Note that 'end' is exclusive while 'lend' is inclusive.
 	 */
 	start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (lend == -1)
@@ -249,6 +242,23 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (index >= end)
 				break;
 
+			if (PageTransTailCache(page)) {
+				/* part of already handled huge page */
+				if (!page->mapping)
+					continue;
+				/* the range starts in middle of huge page */
+				partial_thp_start = true;
+				start = index & ~HPAGE_CACHE_INDEX_MASK;
+				continue;
+			}
+			/* the range ends on huge page */
+			if (PageTransHugeCache(page) &&
+				index == (end & ~HPAGE_CACHE_INDEX_MASK)) {
+				partial_thp_end = true;
+				end = index;
+				break;
+			}
+
 			if (!trylock_page(page))
 				continue;
 			WARN_ON(page->index != index);
@@ -265,34 +275,74 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		index++;
 	}
 
-	if (partial_start) {
-		struct page *page = find_lock_page(mapping, start - 1);
+	if (partial_thp_start || lstart & ~PAGE_CACHE_MASK) {
+		pgoff_t off;
+		struct page *page;
+		unsigned pstart, pend;
+		void (*zero_segment)(struct page *page,
+				unsigned start, unsigned len);
+retry_partial_start:
+		if (partial_thp_start) {
+			zero_segment = zero_huge_user_segment;
+			off = (start - 1) & ~HPAGE_CACHE_INDEX_MASK;
+			pstart = lstart & ~HPAGE_PMD_MASK;
+			if ((end & ~HPAGE_CACHE_INDEX_MASK) == off)
+				pend = (lend - 1) & ~HPAGE_PMD_MASK;
+			else
+				pend = HPAGE_PMD_SIZE;
+		} else {
+			zero_segment = zero_user_segment;
+			off = start - 1;
+			pstart = lstart & ~PAGE_CACHE_MASK;
+			if (start > end)
+				pend = (lend - 1) & ~PAGE_CACHE_MASK;
+			else
+				pend = PAGE_CACHE_SIZE;
+		}
+
+		page = find_get_page(mapping, off);
 		if (page) {
-			unsigned int top = PAGE_CACHE_SIZE;
-			if (start > end) {
-				/* Truncation within a single page */
-				top = partial_end;
-				partial_end = 0;
+			/* the last tail page*/
+			if (PageTransTailCache(page)) {
+				partial_thp_start = true;
+				page_cache_release(page);
+				goto retry_partial_start;
 			}
+
+			lock_page(page);
 			wait_on_page_writeback(page);
-			zero_user_segment(page, partial_start, top);
+			zero_segment(page, pstart, pend);
 			cleancache_invalidate_page(mapping, page);
 			if (page_has_private(page))
-				do_invalidatepage(page, partial_start,
-						  top - partial_start);
+				do_invalidatepage(page, pstart,
+						pend - pstart);
 			unlock_page(page);
 			page_cache_release(page);
 		}
 	}
-	if (partial_end) {
-		struct page *page = find_lock_page(mapping, end);
+	if (partial_thp_end || (lend + 1) & ~PAGE_CACHE_MASK) {
+		pgoff_t off;
+		struct page *page;
+		unsigned pend;
+		void (*zero_segment)(struct page *page,
+				unsigned start, unsigned len);
+		if (partial_thp_end) {
+			zero_segment = zero_huge_user_segment;
+			off = end & ~HPAGE_CACHE_INDEX_MASK;
+			pend = (lend - 1) & ~HPAGE_PMD_MASK;
+		} else {
+			zero_segment = zero_user_segment;
+			off = end;
+			pend = (lend - 1) & ~PAGE_CACHE_MASK;
+		}
+
+		page = find_lock_page(mapping, end);
 		if (page) {
 			wait_on_page_writeback(page);
-			zero_user_segment(page, 0, partial_end);
+			zero_segment(page, 0, pend);
 			cleancache_invalidate_page(mapping, page);
 			if (page_has_private(page))
-				do_invalidatepage(page, 0,
-						  partial_end);
+				do_invalidatepage(page, 0, pend);
 			unlock_page(page);
 			page_cache_release(page);
 		}
@@ -327,6 +377,9 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (index >= end)
 				break;
 
+			if (PageTransTailCache(page))
+				continue;
+
 			lock_page(page);
 			WARN_ON(page->index != index);
 			wait_on_page_writeback(page);
@@ -401,6 +454,13 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (index > end)
 				break;
 
+			/* skip huge page if it's not fully in the range */
+			if (PageTransHugeCache(page) &&
+					index + HPAGE_CACHE_NR - 1 > end)
+				continue;
+			if (PageTransTailCache(page))
+				continue;
+
 			if (!trylock_page(page))
 				continue;
 			WARN_ON(page->index != index);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists