lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20091204174253.589D.A69D9226@jp.fujitsu.com>
Date:	Fri,  4 Dec 2009 17:43:46 +0900 (JST)
From:	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
To:	LKML <linux-kernel@...r.kernel.org>
Cc:	kosaki.motohiro@...fujitsu.com, linux-mm <linux-mm@...ck.org>,
	Rik van Riel <riel@...hat.com>,
	Andrea Arcangeli <aarcange@...hat.com>,
	Larry Woodman <lwoodman@...hat.com>
Subject: [PATCH 4/7] Replace page_referenced() with wipe_page_reference()

>From d9110c2804a4b88e460edada140b8bb0f7eb3a18 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
Date: Fri, 4 Dec 2009 11:45:18 +0900
Subject: [PATCH 4/7] Replace page_referenced() with wipe_page_reference()

page_referenced() imply "test the page was referenced or not", but
shrink_active_list() use it for drop pte young bit. then, it should be
renamed.

Plus, vm_flags argument is really ugly. instead, introduce new
struct page_reference_context, it's for collect some statistics.

This patch doesn't have any behavior change.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
---
 include/linux/ksm.h  |   11 +++--
 include/linux/rmap.h |   16 +++++--
 mm/ksm.c             |   17 +++++---
 mm/rmap.c            |  112 ++++++++++++++++++++++++-------------------------
 mm/vmscan.c          |   46 ++++++++++++--------
 5 files changed, 112 insertions(+), 90 deletions(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index bed5f16..e1a60d3 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -85,8 +85,8 @@ static inline struct page *ksm_might_need_to_copy(struct page *page,
 	return ksm_does_need_to_copy(page, vma, address);
 }
 
-int page_referenced_ksm(struct page *page,
-			struct mem_cgroup *memcg, unsigned long *vm_flags);
+int wipe_page_reference_ksm(struct page *page, struct mem_cgroup *memcg,
+			    struct page_reference_context *refctx);
 int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
 int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
 		  struct vm_area_struct *, unsigned long, void *), void *arg);
@@ -120,10 +120,11 @@ static inline struct page *ksm_might_need_to_copy(struct page *page,
 	return page;
 }
 
-static inline int page_referenced_ksm(struct page *page,
-			struct mem_cgroup *memcg, unsigned long *vm_flags)
+static inline int wipe_page_reference_ksm(struct page *page,
+					  struct mem_cgroup *memcg,
+					  struct page_reference_context *refctx)
 {
-	return 0;
+	return SWAP_SUCCESS;
 }
 
 static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b019ae6..564d981 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -108,13 +108,21 @@ static inline void page_dup_rmap(struct page *page)
 	atomic_inc(&page->_mapcount);
 }
 
+struct page_reference_context {
+	int is_page_locked;
+	unsigned long referenced;
+	unsigned long exec_referenced;
+	int maybe_mlocked;	/* found VM_LOCKED, but it's unstable result */
+};
+
 /*
  * Called from mm/vmscan.c to handle paging out
  */
-int page_referenced(struct page *, int is_locked,
-			struct mem_cgroup *cnt, unsigned long *vm_flags);
-int page_referenced_one(struct page *, struct vm_area_struct *,
-	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
+int wipe_page_reference(struct page *page, struct mem_cgroup *memcg,
+		    struct page_reference_context *refctx);
+int wipe_page_reference_one(struct page *page,
+			    struct page_reference_context *refctx,
+			    struct vm_area_struct *vma, unsigned long address);
 
 enum ttu_flags {
 	TTU_UNMAP = 0,			/* unmap mode */
diff --git a/mm/ksm.c b/mm/ksm.c
index c1647a2..3c121c8 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1544,15 +1544,15 @@ struct page *ksm_does_need_to_copy(struct page *page,
 	return new_page;
 }
 
-int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
-			unsigned long *vm_flags)
+int wipe_page_reference_ksm(struct page *page, struct mem_cgroup *memcg,
+			    struct page_reference_context *refctx)
 {
 	struct stable_node *stable_node;
 	struct rmap_item *rmap_item;
 	struct hlist_node *hlist;
 	unsigned int mapcount = page_mapcount(page);
-	int referenced = 0;
 	int search_new_forks = 0;
+	int ret = SWAP_SUCCESS;
 
 	VM_BUG_ON(!PageKsm(page));
 	VM_BUG_ON(!PageLocked(page));
@@ -1582,10 +1582,15 @@ again:
 			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
 				continue;
 
-			referenced += page_referenced_one(page, vma,
-				rmap_item->address, &mapcount, vm_flags);
+			ret = wipe_page_reference_one(page, refctx, vma,
+						      rmap_item->address);
+			if (ret != SWAP_SUCCESS)
+				goto out;
+			mapcount--;
 			if (!search_new_forks || !mapcount)
 				break;
+			if (refctx->maybe_mlocked)
+				goto out;
 		}
 		spin_unlock(&anon_vma->lock);
 		if (!mapcount)
@@ -1594,7 +1599,7 @@ again:
 	if (!search_new_forks++)
 		goto again;
 out:
-	return referenced;
+	return ret;
 }
 
 int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
diff --git a/mm/rmap.c b/mm/rmap.c
index fb0983a..2f4451b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -371,17 +371,16 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 }
 
 /*
- * Subfunctions of page_referenced: page_referenced_one called
- * repeatedly from either page_referenced_anon or page_referenced_file.
+ * Subfunctions of wipe_page_reference: wipe_page_reference_one called
+ * repeatedly from either wipe_page_reference_anon or wipe_page_reference_file.
  */
-int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-			unsigned long address, unsigned int *mapcount,
-			unsigned long *vm_flags)
+int wipe_page_reference_one(struct page *page,
+			    struct page_reference_context *refctx,
+			    struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte;
 	spinlock_t *ptl;
-	int referenced = 0;
 
 	/*
 	 * Don't want to elevate referenced for mlocked page that gets this far,
@@ -389,8 +388,7 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 	 * unevictable list.
 	 */
 	if (vma->vm_flags & VM_LOCKED) {
-		*mapcount = 0;	/* break early from loop */
-		*vm_flags |= VM_LOCKED;
+		refctx->maybe_mlocked = 1;
 		goto out;
 	}
 
@@ -406,37 +404,38 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		 * mapping is already gone, the unmap path will have
 		 * set PG_referenced or activated the page.
 		 */
-		if (likely(!VM_SequentialReadHint(vma)))
-			referenced++;
+		if (likely(!VM_SequentialReadHint(vma))) {
+			refctx->referenced++;
+			if (vma->vm_flags & VM_EXEC) {
+				refctx->exec_referenced++;
+			}
+		}
 	}
 
 	/* Pretend the page is referenced if the task has the
 	   swap token and is in the middle of a page fault. */
 	if (mm != current->mm && has_swap_token(mm) &&
 			rwsem_is_locked(&mm->mmap_sem))
-		referenced++;
+		refctx->referenced++;
 
-	(*mapcount)--;
 	pte_unmap_unlock(pte, ptl);
 
-	if (referenced)
-		*vm_flags |= vma->vm_flags;
 out:
-	return referenced;
+	return SWAP_SUCCESS;
 }
 
-static int page_referenced_anon(struct page *page,
-				struct mem_cgroup *mem_cont,
-				unsigned long *vm_flags)
+static int wipe_page_reference_anon(struct page *page,
+				    struct mem_cgroup *memcg,
+				    struct page_reference_context *refctx)
 {
 	unsigned int mapcount;
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
-	int referenced = 0;
+	int ret = SWAP_SUCCESS;
 
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
-		return referenced;
+		return ret;
 
 	mapcount = page_mapcount(page);
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
@@ -448,20 +447,22 @@ static int page_referenced_anon(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
-		referenced += page_referenced_one(page, vma, address,
-						  &mapcount, vm_flags);
-		if (!mapcount)
+		ret = wipe_page_reference_one(page, refctx, vma, address);
+		if (ret != SWAP_SUCCESS)
+			break;
+		mapcount--;
+		if (!mapcount || refctx->maybe_mlocked)
 			break;
 	}
 
 	page_unlock_anon_vma(anon_vma);
-	return referenced;
+	return ret;
 }
 
 /**
- * page_referenced_file - referenced check for object-based rmap
+ * wipe_page_reference_file - wipe page reference for object-based rmap
  * @page: the page we're checking references on.
  * @mem_cont: target memory controller
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
@@ -471,18 +472,18 @@ static int page_referenced_anon(struct page *page,
  * pointer, then walking the chain of vmas it holds.  It returns the number
  * of references it found.
  *
- * This function is only called from page_referenced for object-based pages.
+ * This function is only called from wipe_page_reference for object-based pages.
  */
-static int page_referenced_file(struct page *page,
-				struct mem_cgroup *mem_cont,
-				unsigned long *vm_flags)
+static int wipe_page_reference_file(struct page *page,
+				    struct mem_cgroup *memcg,
+				    struct page_reference_context *refctx)
 {
 	unsigned int mapcount;
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
 	struct prio_tree_iter iter;
-	int referenced = 0;
+	int ret = SWAP_SUCCESS;
 
 	/*
 	 * The caller's checks on page->mapping and !PageAnon have made
@@ -516,65 +517,62 @@ static int page_referenced_file(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
-		referenced += page_referenced_one(page, vma, address,
-						  &mapcount, vm_flags);
-		if (!mapcount)
+		ret = wipe_page_reference_one(page, refctx, vma, address);
+		if (ret != SWAP_SUCCESS)
+			break;
+		mapcount--;
+		if (!mapcount || refctx->maybe_mlocked)
 			break;
 	}
 
 	spin_unlock(&mapping->i_mmap_lock);
-	return referenced;
+	return ret;
 }
 
 /**
- * page_referenced - test if the page was referenced
+ * wipe_page_reference - clear and test the page reference and pte young bit
  * @page: the page to test
- * @is_locked: caller holds lock on the page
- * @mem_cont: target memory controller
- * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
+ * @memcg: target memory controller
+ * @refctx: context for collect some statistics
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of ptes which referenced the page.
  */
-int page_referenced(struct page *page,
-		    int is_locked,
-		    struct mem_cgroup *mem_cont,
-		    unsigned long *vm_flags)
+int wipe_page_reference(struct page *page,
+			struct mem_cgroup *memcg,
+			struct page_reference_context *refctx)
 {
-	int referenced = 0;
 	int we_locked = 0;
+	int ret = SWAP_SUCCESS;
 
 	if (TestClearPageReferenced(page))
-		referenced++;
+		refctx->referenced++;
 
-	*vm_flags = 0;
 	if (page_mapped(page) && page_rmapping(page)) {
-		if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
+		if (!refctx->is_page_locked &&
+		    (!PageAnon(page) || PageKsm(page))) {
 			we_locked = trylock_page(page);
 			if (!we_locked) {
-				referenced++;
+				refctx->referenced++;
 				goto out;
 			}
 		}
 		if (unlikely(PageKsm(page)))
-			referenced += page_referenced_ksm(page, mem_cont,
-								vm_flags);
+			ret = wipe_page_reference_ksm(page, memcg, refctx);
 		else if (PageAnon(page))
-			referenced += page_referenced_anon(page, mem_cont,
-								vm_flags);
+			ret = wipe_page_reference_anon(page, memcg, refctx);
 		else if (page->mapping)
-			referenced += page_referenced_file(page, mem_cont,
-								vm_flags);
+			ret = wipe_page_reference_file(page, memcg, refctx);
 		if (we_locked)
 			unlock_page(page);
 	}
 out:
 	if (page_test_and_clear_young(page))
-		referenced++;
+		refctx->referenced++;
 
-	return referenced;
+	return ret;
 }
 
 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4ba08da..0db9c06 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -569,7 +569,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 	struct pagevec freed_pvec;
 	int pgactivate = 0;
 	unsigned long nr_reclaimed = 0;
-	unsigned long vm_flags;
 
 	cond_resched();
 
@@ -578,7 +577,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		struct address_space *mapping;
 		struct page *page;
 		int may_enter_fs;
-		int referenced;
+		struct page_reference_context refctx = {
+			.is_page_locked = 1,
+		};
 
 		cond_resched();
 
@@ -620,16 +621,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto keep_locked;
 		}
 
-		referenced = page_referenced(page, 1,
-						sc->mem_cgroup, &vm_flags);
+		wipe_page_reference(page, sc->mem_cgroup, &refctx);
 		/*
 		 * In active use or really unfreeable?  Activate it.
 		 * If page which have PG_mlocked lost isoltation race,
 		 * try_to_unmap moves it to unevictable list
 		 */
 		if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
-					referenced && page_mapped(page)
-					&& !(vm_flags & VM_LOCKED))
+		    page_mapped(page) && refctx.referenced &&
+		    !refctx.maybe_mlocked)
 			goto activate_locked;
 
 		/*
@@ -664,7 +664,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		}
 
 		if (PageDirty(page)) {
-			if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
+			if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
+			    refctx.referenced)
 				goto keep_locked;
 			if (!may_enter_fs)
 				goto keep_locked;
@@ -1243,9 +1244,10 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
  *
  * If the pages are mostly unmapped, the processing is fast and it is
  * appropriate to hold zone->lru_lock across the whole operation.  But if
- * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone->lru_lock around each page.  It's impossible to balance
- * this, so instead we remove the pages from the LRU while processing them.
+ * the pages are mapped, the processing is slow (because wipe_page_reference()
+ * walk each ptes) so we should drop zone->lru_lock around each page.  It's
+ * impossible to balance this, so instead we remove the pages from the LRU
+ * while processing them.
  * It is safe to rely on PG_active against the non-LRU pages in here because
  * nobody will play with that bit on a non-LRU page.
  *
@@ -1291,7 +1293,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 {
 	unsigned long nr_taken;
 	unsigned long pgscanned;
-	unsigned long vm_flags;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
 	LIST_HEAD(l_active);
 	LIST_HEAD(l_inactive);
@@ -1325,6 +1326,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	spin_unlock_irq(&zone->lru_lock);
 
 	while (!list_empty(&l_hold)) {
+		struct page_reference_context refctx = {
+			.is_page_locked = 0,
+		};
+
 		cond_resched();
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
@@ -1334,10 +1339,17 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			continue;
 		}
 
-		/* page_referenced clears PageReferenced */
-		if (page_mapped(page) &&
-		    page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
+		if (!page_mapped(page)) {
+			ClearPageActive(page);	/* we are de-activating */
+			list_add(&page->lru, &l_inactive);
+			continue;
+		}
+
+		wipe_page_reference(page, sc->mem_cgroup, &refctx);
+
+		if (refctx.referenced)
 			nr_rotated++;
+		if (refctx.exec_referenced && page_is_file_cache(page)) {
 			/*
 			 * Identify referenced, file-backed active pages and
 			 * give them one more trip around the active list. So
@@ -1347,10 +1359,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			 * IO, plus JVM can create lots of anon VM_EXEC pages,
 			 * so we ignore them here.
 			 */
-			if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
-				list_add(&page->lru, &l_active);
-				continue;
-			}
+			list_add(&page->lru, &l_active);
+			continue;
 		}
 
 		ClearPageActive(page);	/* we are de-activating */
-- 
1.6.5.2



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ