lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu,  2 Jan 2014 16:12:18 +0900
From:	Minchan Kim <minchan@...nel.org>
To:	linux-mm@...ck.org, linux-kernel@...r.kernel.org
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Mel Gorman <mgorman@...e.de>, Hugh Dickins <hughd@...gle.com>,
	Dave Hansen <dave.hansen@...el.com>,
	Rik van Riel <riel@...hat.com>,
	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
	Michel Lespinasse <walken@...gle.com>,
	Johannes Weiner <hannes@...xchg.org>,
	John Stultz <john.stultz@...aro.org>,
	Dhaval Giani <dhaval.giani@...il.com>,
	"H. Peter Anvin" <hpa@...or.com>,
	Android Kernel Team <kernel-team@...roid.com>,
	Robert Love <rlove@...gle.com>, Mel Gorman <mel@....ul.ie>,
	Dmitry Adamushko <dmitry.adamushko@...il.com>,
	Dave Chinner <david@...morbit.com>, Neil Brown <neilb@...e.de>,
	Andrea Righi <andrea@...terlinux.com>,
	Andrea Arcangeli <aarcange@...hat.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>,
	Mike Hommey <mh@...ndium.org>, Taras Glek <tglek@...illa.com>,
	Jan Kara <jack@...e.cz>,
	KOSAKI Motohiro <kosaki.motohiro@...il.com>,
	Rob Clark <robdclark@...il.com>, Jason Evans <je@...com>,
	Minchan Kim <minchan@...nel.org>
Subject: [PATCH v10 10/16] vrange: Purging vrange-anon pages from shrinker

This patch provides the logic to discard anonymous vranges by
generating the page list for the volatile ranges setting the ptes
volatile, and discarding the pages.

Cc: Mel Gorman <mel@....ul.ie>
Cc: Hugh Dickins <hughd@...gle.com>
Cc: Dave Hansen <dave.hansen@...el.com>
Cc: Rik van Riel <riel@...hat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@...il.com>
Cc: Michel Lespinasse <walken@...gle.com>
Cc: Johannes Weiner <hannes@...xchg.org>
Signed-off-by: John Stultz <john.stultz@...aro.org>
[jstultz: Code tweaks and commit log rewording]
Signed-off-by: Minchan Kim <minchan@...nel.org>
---
 mm/vrange.c |  184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 183 insertions(+), 1 deletion(-)

diff --git a/mm/vrange.c b/mm/vrange.c
index 4a52b7a05f9a..0fa669c56ab8 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -11,6 +11,8 @@
 #include <linux/hugetlb.h>
 #include "internal.h"
 #include <linux/mmu_notifier.h>
+#include <linux/mm_inline.h>
+#include <linux/migrate.h>
 
 static struct kmem_cache *vrange_cachep;
 
@@ -19,6 +21,11 @@ static struct vrange_list {
 	spinlock_t lock;
 } vrange_list;
 
+struct vrange_walker {
+	struct vm_area_struct *vma;
+	struct list_head *pagelist;
+};
+
 static inline unsigned long vrange_size(struct vrange *range)
 {
 	return range->node.last + 1 - range->node.start;
@@ -682,11 +689,186 @@ static struct vrange *vrange_isolate(void)
 	return vrange;
 }
 
-static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard)
+static unsigned long discard_vrange_pagelist(struct list_head *page_list)
+{
+	struct page *page;
+	unsigned int nr_discard = 0;
+	LIST_HEAD(ret_pages);
+	LIST_HEAD(free_pages);
+
+	while (!list_empty(page_list)) {
+		int err;
+		page = list_entry(page_list->prev, struct page, lru);
+		list_del(&page->lru);
+		if (!trylock_page(page)) {
+			list_add(&page->lru, &ret_pages);
+			continue;
+		}
+
+		/*
+		 * discard_vpage returns unlocked page if it
+		 * is successful
+		 */
+		err = discard_vpage(page);
+		if (err) {
+			unlock_page(page);
+			list_add(&page->lru, &ret_pages);
+			continue;
+		}
+
+		ClearPageActive(page);
+		list_add(&page->lru, &free_pages);
+		dec_zone_page_state(page, NR_ISOLATED_ANON);
+		nr_discard++;
+	}
+
+	free_hot_cold_page_list(&free_pages, 1);
+	list_splice(&ret_pages, page_list);
+	return nr_discard;
+}
+
+static void vrange_pte_entry(pte_t pteval, unsigned long address,
+		unsigned ptent_size, struct mm_walk *walk)
 {
+	struct page *page;
+	struct vrange_walker *vw = walk->private;
+	struct vm_area_struct *vma = vw->vma;
+	struct list_head *pagelist = vw->pagelist;
+
+	if (pte_none(pteval))
+		return;
+
+	if (!pte_present(pteval))
+		return;
+
+	page = vm_normal_page(vma, address, pteval);
+	if (unlikely(!page))
+		return;
+
+	if (!PageLRU(page) || PageLocked(page))
+		return;
+
+	BUG_ON(PageCompound(page));
+
+	if (isolate_lru_page(page))
+		return;
+
+	list_add(&page->lru, pagelist);
+
+	VM_BUG_ON(page_is_file_cache(page));
+	inc_zone_page_state(page, NR_ISOLATED_ANON);
+}
+
+static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+		struct mm_walk *walk)
+{
+	struct vrange_walker *vw = walk->private;
+	struct vm_area_struct *uninitialized_var(vma);
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	vma = vw->vma;
+	split_huge_page_pmd(vma, addr, pmd);
+	if (pmd_trans_unstable(pmd))
+		return 0;
+
+	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	for (; addr != end; pte++, addr += PAGE_SIZE)
+		vrange_pte_entry(*pte, addr, PAGE_SIZE, walk);
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+
 	return 0;
 }
 
+static unsigned long discard_vma_pages(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long start,
+		unsigned long end)
+{
+	unsigned long ret = 0;
+	LIST_HEAD(pagelist);
+	struct vrange_walker vw;
+	struct mm_walk vrange_walk = {
+		.pmd_entry = vrange_pte_range,
+		.mm = vma->vm_mm,
+		.private = &vw,
+	};
+
+	vw.pagelist = &pagelist;
+	vw.vma = vma;
+
+	walk_page_range(start, end, &vrange_walk);
+
+	if (!list_empty(&pagelist))
+		ret = discard_vrange_pagelist(&pagelist);
+
+	putback_lru_pages(&pagelist);
+	return ret;
+}
+
+/*
+ * vrange->owner isn't stable because caller doesn't hold vrange_lock
+ * so avoid touching vrange->owner.
+ */
+static int __discard_vrange_anon(struct mm_struct *mm, struct vrange *vrange,
+		unsigned long *ret_discard)
+{
+	struct vm_area_struct *vma;
+	unsigned long nr_discard = 0;
+	unsigned long start = vrange->node.start;
+	unsigned long end = vrange->node.last + 1;
+	int ret = 0;
+
+	/* It prevent to destroy vma when the process exist */
+	if (!atomic_inc_not_zero(&mm->mm_users))
+		return ret;
+
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		mmput(mm);
+		ret = -EAGAIN;
+		goto out; /* this vrange could be retried */
+	}
+
+	vma = find_vma(mm, start);
+	if (!vma || (vma->vm_start >= end))
+		goto out_unlock;
+
+	for (; vma; vma = vma->vm_next) {
+		if (vma->vm_start >= end)
+			break;
+		BUG_ON(vma->vm_flags & (VM_SPECIAL|VM_LOCKED|VM_MIXEDMAP|
+					VM_HUGETLB));
+		cond_resched();
+		nr_discard += discard_vma_pages(mm, vma,
+				max_t(unsigned long, start, vma->vm_start),
+				min_t(unsigned long, end, vma->vm_end));
+	}
+out_unlock:
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	*ret_discard = nr_discard;
+out:
+	return ret;
+}
+
+static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard)
+{
+	int ret = 0;
+	struct mm_struct *mm;
+	struct vrange_root *vroot;
+	vroot = vrange->owner;
+
+	/* TODO : handle VRANGE_FILE */
+	if (vroot->type != VRANGE_MM)
+		goto out;
+
+	mm = vroot->object;
+	ret = __discard_vrange_anon(mm, vrange, nr_discard);
+out:
+	return ret;
+}
+
+
 #define VRANGE_SCAN_THRESHOLD	(4 << 20)
 
 unsigned long shrink_vrange(enum lru_list lru, struct lruvec *lruvec,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ