lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <1462309367.21143.12.camel@linux.intel.com>
Date:	Tue, 03 May 2016 14:02:47 -0700
From:	Tim Chen <tim.c.chen@...ux.intel.com>
To:	Andrew Morton <akpm@...ux-foundation.org>,
	Vladimir Davydov <vdavydov@...tuozzo.com>,
	Johannes Weiner <hannes@...xchg.org>,
	Michal Hocko <mhocko@...e.cz>,
	Minchan Kim <minchan@...nel.org>,
	Hugh Dickins <hughd@...gle.com>
Cc:	"Kirill A.Shutemov" <kirill.shutemov@...ux.intel.com>,
	Andi Kleen <andi@...stfloor.org>,
	Aaron Lu <aaron.lu@...el.com>,
	Huang Ying <ying.huang@...el.com>,
	linux-mm <linux-mm@...ck.org>, linux-kernel@...r.kernel.org
Subject: [PATCH 5/7] mm: Batch addtion of pages to swap cache

When a page is to be swapped, it needed to be added to the swap cache
and then removed after the paging has been completed.  A swap partition's
mapping tree lock is acquired for each anonymous page's addition to the
swap cache.

This patch created new functions add_to_swap_batch and
__add_to_swap_cache_batch that allows multiple pages destinied for the
same swap partition to be added to that swap partition's swap cache in
one acquisition of the mapping tree lock.  These functions extend the
original add_to_swap and __add_to_swap_cache. This reduces the contention
of the swap partition's mapping tree lock when we are actively reclaiming
memory and swapping pages

Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
---
 include/linux/swap.h |   2 +
 mm/swap_state.c      | 248 +++++++++++++++++++++++++++++++++++++--------------
 mm/vmscan.c          |  19 ++--
 3 files changed, 196 insertions(+), 73 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index da6d994..cd06f2a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -373,6 +373,8 @@ extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *, struct list_head *list,
 			swp_entry_t *entry);
+extern void add_to_swap_batch(struct page *pages[], struct list_head *list,
+			swp_entry_t entries[], int ret_codes[], int nr);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
 extern void __delete_from_swap_cache(struct page *);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bad02c1..ce02024 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -72,49 +72,94 @@ void show_swap_cache_info(void)
 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
 }
 
-/*
- * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
- * but sets SwapCache flag and private instead of mapping and index.
- */
-int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+void __add_to_swap_cache_batch(struct page *pages[], swp_entry_t entries[],
+				int ret[], int nr)
 {
-	int error;
+	int error, i;
 	struct address_space *address_space;
+	struct address_space *prev;
+	struct page *page;
+	swp_entry_t entry;
 
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	VM_BUG_ON_PAGE(PageSwapCache(page), page);
-	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+	prev = NULL;
+	address_space = NULL;
+	for (i = 0; i < nr; ++i) {
+		/* error at pre-processing stage, swap entry already released */
+		if (ret[i] == -ENOENT)
+			continue;
 
-	get_page(page);
-	SetPageSwapCache(page);
-	set_page_private(page, entry.val);
+		page = pages[i];
+		entry = entries[i];
 
-	address_space = swap_address_space(entry);
-	spin_lock_irq(&address_space->tree_lock);
-	error = radix_tree_insert(&address_space->page_tree,
-					entry.val, page);
-	if (likely(!error)) {
-		address_space->nrpages++;
-		__inc_zone_page_state(page, NR_FILE_PAGES);
-		INC_CACHE_INFO(add_total);
-	}
-	spin_unlock_irq(&address_space->tree_lock);
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		VM_BUG_ON_PAGE(PageSwapCache(page), page);
+		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-	if (unlikely(error)) {
-		/*
-		 * Only the context which have set SWAP_HAS_CACHE flag
-		 * would call add_to_swap_cache().
-		 * So add_to_swap_cache() doesn't returns -EEXIST.
-		 */
-		VM_BUG_ON(error == -EEXIST);
-		set_page_private(page, 0UL);
-		ClearPageSwapCache(page);
-		put_page(page);
+		get_page(page);
+		SetPageSwapCache(page);
+		set_page_private(page, entry.val);
+
+		address_space = swap_address_space(entry);
+		if (prev != address_space) {
+			if (prev)
+				spin_unlock_irq(&prev->tree_lock);
+			spin_lock_irq(&address_space->tree_lock);
+		}
+		error = radix_tree_insert(&address_space->page_tree,
+				entry.val, page);
+		if (likely(!error)) {
+			address_space->nrpages++;
+			__inc_zone_page_state(page, NR_FILE_PAGES);
+			INC_CACHE_INFO(add_total);
+		}
+
+		if (unlikely(error)) {
+			spin_unlock_irq(&address_space->tree_lock);
+			address_space = NULL;
+			/*
+			 * Only the context which have set SWAP_HAS_CACHE flag
+			 * would call add_to_swap_cache().
+			 * So add_to_swap_cache() doesn't returns -EEXIST.
+			 */
+			VM_BUG_ON(error == -EEXIST);
+			set_page_private(page, 0UL);
+			ClearPageSwapCache(page);
+			put_page(page);
+		}
+		prev = address_space;
+		ret[i] = error;
 	}
+	if (address_space)
+		spin_unlock_irq(&address_space->tree_lock);
+}
 
-	return error;
+/*
+ * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
+ * but sets SwapCache flag and private instead of mapping and index.
+ */
+int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+{
+	swp_entry_t	entries[1];
+	struct page	*pages[1];
+	int	ret[1];
+
+	pages[0] = page;
+	entries[0] = entry;
+	__add_to_swap_cache_batch(pages, entries, ret, 1);
+	return ret[0];
 }
 
+void add_to_swap_cache_batch(struct page *pages[], swp_entry_t entries[],
+				gfp_t gfp_mask, int ret[], int nr)
+{
+	int error;
+
+	error = radix_tree_maybe_preload(gfp_mask);
+	if (!error) {
+		__add_to_swap_cache_batch(pages, entries, ret, nr);
+		radix_tree_preload_end();
+	}
+}
 
 int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 {
@@ -151,6 +196,73 @@ void __delete_from_swap_cache(struct page *page)
 	INC_CACHE_INFO(del_total);
 }
 
+void add_to_swap_batch(struct page *pages[], struct list_head *list,
+			swp_entry_t entries[], int ret_codes[], int nr)
+{
+	swp_entry_t *entry;
+	struct page *page;
+	int i;
+
+	for (i = 0; i < nr; ++i) {
+		entry = &entries[i];
+		page = pages[i];
+
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		VM_BUG_ON_PAGE(!PageUptodate(page), page);
+
+		ret_codes[i] = 1;
+
+		if (!entry->val)
+			ret_codes[i] = -ENOENT;
+
+		if (mem_cgroup_try_charge_swap(page, *entry)) {
+			swapcache_free(*entry);
+			ret_codes[i] = 0;
+		}
+
+		if (unlikely(PageTransHuge(page)))
+			if (unlikely(split_huge_page_to_list(page, list))) {
+				swapcache_free(*entry);
+				ret_codes[i] = -ENOENT;
+				continue;
+			}
+	}
+
+	/*
+	 * Radix-tree node allocations from PF_MEMALLOC contexts could
+	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
+	 * stops emergency reserves from being allocated.
+	 *
+	 * TODO: this could cause a theoretical memory reclaim
+	 * deadlock in the swap out path.
+	 */
+	/*
+	 * Add it to the swap cache
+	 */
+	add_to_swap_cache_batch(pages, entries,
+			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN,
+				ret_codes, nr);
+
+	for (i = 0; i < nr; ++i) {
+		entry = &entries[i];
+		page = pages[i];
+
+		if (!ret_codes[i]) {    /* Success */
+			ret_codes[i] = 1;
+			continue;
+		} else {        /* -ENOMEM radix-tree allocation failure */
+			/*
+			 * add_to_swap_cache() doesn't return -EEXIST,
+			 * so we can safely clear SWAP_HAS_CACHE flag.
+			 */
+			if (ret_codes[i] != -ENOENT)
+				swapcache_free(*entry);
+			ret_codes[i] = 0;
+			continue;
+		}
+	}
+}
+
 /**
  * add_to_swap - allocate swap space for a page
  * @page: page we want to move to swap
@@ -161,54 +273,56 @@ void __delete_from_swap_cache(struct page *page)
  */
 int add_to_swap(struct page *page, struct list_head *list, swp_entry_t *entry)
 {
-	int err;
-	swp_entry_t ent;
+	int ret[1];
+	swp_entry_t ent[1];
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageUptodate(page), page);
 
 	if (!entry) {
-		ent = get_swap_page();
-		entry = &ent;
+		ent[0] = get_swap_page();
+		entry = &ent[0];
 	}
 
 	if (entry && !entry->val)
 		return 0;
 
-	if (mem_cgroup_try_charge_swap(page, *entry)) {
-		swapcache_free(*entry);
-		return 0;
-	}
+	add_to_swap_batch(&page, list, entry, ret, 1);
+	return ret[0];
+}
 
-	if (unlikely(PageTransHuge(page)))
-		if (unlikely(split_huge_page_to_list(page, list))) {
-			swapcache_free(*entry);
-			return 0;
+void delete_from_swap_cache_batch(struct page pages[], int nr)
+{
+	struct page *page;
+	swp_entry_t entry;
+	struct address_space *address_space, *prev;
+	int i;
+
+	prev = NULL;
+	address_space = NULL;
+	for (i = 0; i < nr; ++i) {
+		page = &pages[i];
+		entry.val = page_private(page);
+
+		address_space = swap_address_space(entry);
+		if (address_space != prev) {
+			if (prev)
+				spin_unlock_irq(&prev->tree_lock);
+			spin_lock_irq(&address_space->tree_lock);
 		}
+		__delete_from_swap_cache(page);
+		prev = address_space;
+	}
+	if (address_space)
+		spin_unlock_irq(&address_space->tree_lock);
 
-	/*
-	 * Radix-tree node allocations from PF_MEMALLOC contexts could
-	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
-	 * stops emergency reserves from being allocated.
-	 *
-	 * TODO: this could cause a theoretical memory reclaim
-	 * deadlock in the swap out path.
-	 */
-	/*
-	 * Add it to the swap cache.
-	 */
-	err = add_to_swap_cache(page, *entry,
-			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
+	for (i = 0; i < nr; ++i) {
+		page = &pages[i];
+		entry.val = page_private(page);
 
-	if (!err) {
-		return 1;
-	} else {	/* -ENOMEM radix-tree allocation failure */
-		/*
-		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
-		 * clear SWAP_HAS_CACHE flag.
-		 */
-		swapcache_free(*entry);
-		return 0;
+		/* can batch this */
+		swapcache_free(entry);
+		put_page(page);
 	}
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 310e2b2..fab61f1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1097,8 +1097,9 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,
 	unsigned long nr_reclaimed = 0;
 	enum pg_result pg_dispose;
 	swp_entry_t swp_entries[SWAP_BATCH];
+	struct page *pages[SWAP_BATCH];
+	int m, i, k, ret[SWAP_BATCH];
 	struct page *page;
-	int m, i, k;
 
 	while (n > 0) {
 		int swap_ret = SWAP_SUCCESS;
@@ -1117,13 +1118,19 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,
 			page = lru_to_page(swap_pages);
 
 			list_del(&page->lru);
+			pages[i] = page;
+		}
 
-			/*
-			* Anonymous process memory has backing store?
-			* Try to allocate it some swap space here.
-			*/
+		/*
+		* Anonymous process memory has backing store?
+		* Try to allocate it some swap space here.
+		*/
+		add_to_swap_batch(pages, page_list, swp_entries, ret, m);
+
+		for (i = 0; i < m; ++i) {
+			page = pages[i];
 
-			if (!add_to_swap(page, page_list, NULL)) {
+			if (!ret[i]) {
 				pg_finish(page, PG_ACTIVATE_LOCKED, swap_ret,
 						&nr_reclaimed, pgactivate,
 						ret_pages, free_pages);
-- 
2.5.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ