lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 27 Sep 2011 17:49:01 -0700
From:	Michel Lespinasse <walken@...gle.com>
To:	linux-mm@...ck.org, linux-kernel@...r.kernel.org,
	Andrew Morton <akpm@...ux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
	Dave Hansen <dave@...ux.vnet.ibm.com>,
	Rik van Riel <riel@...hat.com>,
	Balbir Singh <bsingharora@...il.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:	Andrea Arcangeli <aarcange@...hat.com>,
	Johannes Weiner <jweiner@...hat.com>,
	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
	Hugh Dickins <hughd@...gle.com>,
	Michael Wolf <mjwolf@...ibm.com>
Subject: [PATCH 3/9] kstaled: page_referenced_kstaled() and supporting infrastructure.

Add a new page_referenced_kstaled() interface. The desired behavior
is that page_referenced() returns page references since the last
page_referenced() call, and page_referenced_kstaled() returns page
references since the last page_referenced_kstaled() call, but they
are both independent of each other and do not influence each other.

The following events are counted as kstaled page references:
- CPU data access to the page (as noticed through pte_young());
- mark_page_accessed() calls;
- page being freed / reallocated.


Signed-off-by: Michel Lespinasse <walken@...gle.com>
---
 include/linux/page-flags.h |   35 ++++++++++++++++++++++
 include/linux/rmap.h       |   68 +++++++++++++++++++++++++++++++++++++++----
 mm/rmap.c                  |   62 ++++++++++++++++++++++++++++-----------
 mm/swap.c                  |    1 +
 4 files changed, 141 insertions(+), 25 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6081493..e964d98 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,13 @@
  * PG_hwpoison indicates that a page got corrupted in hardware and contains
  * data with incorrect ECC bits that triggered a machine check. Accessing is
  * not safe since it may cause another machine check. Don't touch!
+ *
+ * PG_young indicates that kstaled cleared the young bit on some PTEs pointing
+ * to that page. In order to avoid interacting with the LRU algorithm, we want
+ * the next page_referenced() call to still consider the page young.
+ *
+ * PG_idle indicates that the page has not been referenced since the last time
+ * kstaled scanned it.
  */
 
 /*
@@ -107,6 +114,10 @@ enum pageflags {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
 #endif
+#ifdef CONFIG_KSTALED
+	PG_young,		/* kstaled cleared pte_young */
+	PG_idle,		/* idle since start of kstaled interval */
+#endif
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -278,6 +289,30 @@ PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
 
+#ifdef CONFIG_KSTALED
+
+PAGEFLAG(Young, young)
+PAGEFLAG(Idle, idle)
+
+static inline void set_page_young(struct page *page)
+{
+	if (!PageYoung(page))
+		SetPageYoung(page);
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+	if (PageIdle(page))
+		ClearPageIdle(page);
+}
+
+#else /* !CONFIG_KSTALED */
+
+static inline void set_page_young(struct page *page) {}
+static inline void clear_page_idle(struct page *page) {}
+
+#endif /* CONFIG_KSTALED */
+
 u64 stable_page_flags(struct page *page);
 
 static inline int PageUptodate(struct page *page)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 82fef42..88a0b85 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -74,6 +74,8 @@ struct page_referenced_info {
 	unsigned long vm_flags;
 	unsigned int pr_flags;
 #define PR_REFERENCED  1
+#define PR_DIRTY       2
+#define PR_FOR_KSTALED 4
 };
 
 #ifdef CONFIG_MMU
@@ -165,8 +167,8 @@ static inline void page_dup_rmap(struct page *page)
 /*
  * Called from mm/vmscan.c to handle paging out
  */
-void page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt,
-		     struct page_referenced_info *info);
+void __page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt,
+		       struct page_referenced_info *info);
 void page_referenced_one(struct page *, struct vm_area_struct *,
 			 unsigned long address, unsigned int *mapcount,
 			 struct page_referenced_info *info);
@@ -244,12 +246,10 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
 #define anon_vma_prepare(vma)	(0)
 #define anon_vma_link(vma)	do {} while (0)
 
-static inline void page_referenced(struct page *page, int is_locked,
-				   struct mem_cgroup *cnt,
-				   struct page_referenced_info *info)
+static inline void __page_referenced(struct page *page, int is_locked,
+				     struct mem_cgroup *cnt,
+				     struct page_referenced_info *info)
 {
-	info->vm_flags = 0;
-	info->pr_flags = 0;
 }
 
 #define try_to_unmap(page, refs) SWAP_FAIL
@@ -262,6 +262,60 @@ static inline int page_mkclean(struct page *page)
 
 #endif	/* CONFIG_MMU */
 
+/**
+ * page_referenced - test if the page was referenced
+ * @page: the page to test
+ * @is_locked: caller holds lock on the page
+ * @mem_cont: target memory controller
+ * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
+ *
+ * Quick test_and_clear_referenced for all mappings to a page,
+ * returns the number of ptes which referenced the page.
+ */
+static inline void page_referenced(struct page *page,
+				   int is_locked,
+				   struct mem_cgroup *mem_cont,
+				   struct page_referenced_info *info)
+{
+	info->vm_flags = 0;
+	info->pr_flags = 0;
+
+#ifdef CONFIG_KSTALED
+	/*
+	 * Always clear PageYoung at the start of a scanning interval. It will
+	 * get get set if kstaled clears a young bit in a pte reference,
+	 * so that vmscan will still see the page as referenced.
+	 */
+	if (PageYoung(page)) {
+		ClearPageYoung(page);
+		info->pr_flags |= PR_REFERENCED;
+	}
+#endif
+
+	__page_referenced(page, is_locked, mem_cont, info);
+}
+
+#ifdef CONFIG_KSTALED
+static inline void page_referenced_kstaled(struct page *page, bool is_locked,
+					   struct page_referenced_info *info)
+{
+	info->vm_flags = 0;
+	info->pr_flags = PR_FOR_KSTALED;
+
+	/*
+	 * Always set PageIdle at the start of a scanning interval. It will
+	 * get cleared if a young page reference is encountered; otherwise
+	 * the page will be counted as idle at the next kstaled scan cycle.
+	 */
+	if (!PageIdle(page)) {
+		SetPageIdle(page);
+		info->pr_flags |= PR_REFERENCED;
+	}
+
+	__page_referenced(page, is_locked, NULL, info);
+}
+#endif
+
 /*
  * Return values of try_to_unmap
  */
diff --git a/mm/rmap.c b/mm/rmap.c
index f87afd0..fa8440e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -670,6 +670,8 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
 			return;
 		}
 
+		info->pr_flags |= PR_DIRTY;
+
 		if (vma->vm_flags & VM_LOCKED) {
 			spin_unlock(&mm->page_table_lock);
 			*mapcount = 0;	/* break early from loop */
@@ -678,8 +680,17 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		}
 
 		/* go ahead even if the pmd is pmd_trans_splitting() */
-		if (pmdp_clear_flush_young_notify(vma, address, pmd))
-			referenced = true;
+		if (!(info->pr_flags & PR_FOR_KSTALED)) {
+			if (pmdp_clear_flush_young_notify(vma, address, pmd)) {
+				referenced = true;
+				clear_page_idle(page);
+			}
+		} else {
+			if (pmdp_test_and_clear_young(vma, address, pmd)) {
+				referenced = true;
+				set_page_young(page);
+			}
+		}
 		spin_unlock(&mm->page_table_lock);
 	} else {
 		pte_t *pte;
@@ -693,6 +704,9 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		if (!pte)
 			return;
 
+		if (pte_dirty(*pte))
+			info->pr_flags |= PR_DIRTY;
+
 		if (vma->vm_flags & VM_LOCKED) {
 			pte_unmap_unlock(pte, ptl);
 			*mapcount = 0;	/* break early from loop */
@@ -700,23 +714,38 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
 			return;
 		}
 
-		if (ptep_clear_flush_young_notify(vma, address, pte)) {
+		if (!(info->pr_flags & PR_FOR_KSTALED)) {
+			if (ptep_clear_flush_young_notify(vma, address, pte)) {
+				/*
+				 * Don't treat a reference through a
+				 * sequentially read mapping as such.
+				 * If the page has been used in another
+				 * mapping, we will catch it; if this other
+				 * mapping is already gone, the unmap path
+				 * will have set PG_referenced or activated
+				 * the page.
+				 */
+				if (likely(!VM_SequentialReadHint(vma)))
+					referenced = true;
+				clear_page_idle(page);
+			}
+		} else {
 			/*
-			 * Don't treat a reference through a sequentially read
-			 * mapping as such.  If the page has been used in
-			 * another mapping, we will catch it; if this other
-			 * mapping is already gone, the unmap path will have
-			 * set PG_referenced or activated the page.
+			 * Within page_referenced_kstaled():
+			 * skip TLB shootdown & VM_SequentialReadHint heuristic
 			 */
-			if (likely(!VM_SequentialReadHint(vma)))
+			if (ptep_test_and_clear_young(vma, address, pte)) {
 				referenced = true;
+				set_page_young(page);
+			}
 		}
 		pte_unmap_unlock(pte, ptl);
 	}
 
 	/* Pretend the page is referenced if the task has the
 	   swap token and is in the middle of a page fault. */
-	if (mm != current->mm && has_swap_token(mm) &&
+	if (!(info->pr_flags & PR_FOR_KSTALED) &&
+			mm != current->mm && has_swap_token(mm) &&
 			rwsem_is_locked(&mm->mmap_sem))
 		referenced = true;
 
@@ -828,7 +857,7 @@ static void page_referenced_file(struct page *page,
 }
 
 /**
- * page_referenced - test if the page was referenced
+ * __page_referenced - test if the page was referenced
  * @page: the page to test
  * @is_locked: caller holds lock on the page
  * @mem_cont: target memory controller
@@ -838,16 +867,13 @@ static void page_referenced_file(struct page *page,
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of ptes which referenced the page.
  */
-void page_referenced(struct page *page,
-		     int is_locked,
-		     struct mem_cgroup *mem_cont,
-		     struct page_referenced_info *info)
+void __page_referenced(struct page *page,
+		       int is_locked,
+		       struct mem_cgroup *mem_cont,
+		       struct page_referenced_info *info)
 {
 	int we_locked = 0;
 
-	info->vm_flags = 0;
-	info->pr_flags = 0;
-
 	if (page_mapped(page) && page_rmapping(page)) {
 		if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
 			we_locked = trylock_page(page);
diff --git a/mm/swap.c b/mm/swap.c
index 3a442f1..d65b69e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -344,6 +344,7 @@ void mark_page_accessed(struct page *page)
 	} else if (!PageReferenced(page)) {
 		SetPageReferenced(page);
 	}
+	clear_page_idle(page);
 }
 
 EXPORT_SYMBOL(mark_page_accessed);
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ