lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 2 May 2017 08:53:32 +0900
From:   Minchan Kim <minchan@...nel.org>
To:     Johannes Weiner <hannes@...xchg.org>
Cc:     "Huang, Ying" <ying.huang@...el.com>,
        Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org,
        Andrea Arcangeli <aarcange@...hat.com>,
        Ebru Akagunduz <ebru.akagunduz@...il.com>,
        Michal Hocko <mhocko@...nel.org>, Tejun Heo <tj@...nel.org>,
        Hugh Dickins <hughd@...gle.com>, Shaohua Li <shli@...nel.org>,
        Rik van Riel <riel@...hat.com>, cgroups@...r.kernel.org
Subject: Re: [PATCH -mm -v10 1/3] mm, THP, swap: Delay splitting THP during
 swap out

Hi Johannes,

The patch I sent has two clean-up.

First part was as follows:

>From 5400dceb3a7739d4e7ff340fc0831e0e1830ec0b Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@...nel.org>
Date: Fri, 28 Apr 2017 15:04:14 +0900
Subject: [PATCH 1/2] swap: make swapcache_free aware of page size

Now, get_swap_page takes struct page and allocates swap space according
to page size(ie, normal or THP) so it would be more clear to take
struct page in swapcache_free which is a counter function of
get_swap_page without needing if-else statement of caller side.

Cc: Johannes Weiner <hannes@...xchg.org>
Signed-off-by: Minchan Kim <minchan@...nel.org>
---
 include/linux/swap.h |  4 ++--
 mm/shmem.c           |  2 +-
 mm/swap_state.c      | 13 +++----------
 mm/swapfile.c        | 12 ++++++++++--
 mm/vmscan.c          |  2 +-
 5 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b60fea3748f8..16c8d2392ddd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -400,7 +400,7 @@ extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t);
+extern void swapcache_free(struct page *page, swp_entry_t);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
@@ -459,7 +459,7 @@ static inline void swap_free(swp_entry_t swp)
 {
 }
 
-static inline void swapcache_free(swp_entry_t swp)
+static inline void swapcache_free(struct page *page, swp_entry_t swp)
 {
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 29948d7da172..ab1802664e97 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1326,7 +1326,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 
 	mutex_unlock(&shmem_swaplist_mutex);
 free_swap:
-	swapcache_free(swap);
+	swapcache_free(page, swap);
 redirty:
 	set_page_dirty(page);
 	if (wbc->for_reclaim)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 16ff89d058f4..4af44fd4142e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -231,10 +231,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 	return 1;
 
 fail_free:
-	if (PageTransHuge(page))
-		swapcache_free_cluster(entry);
-	else
-		swapcache_free(entry);
+	swapcache_free(page, entry);
 fail:
 	if (PageTransHuge(page) && !split_huge_page_to_list(page, list))
 		goto retry;
@@ -259,11 +256,7 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&address_space->tree_lock);
 
-	if (PageTransHuge(page))
-		swapcache_free_cluster(entry);
-	else
-		swapcache_free(entry);
-
+	swapcache_free(page, entry);
 	page_ref_sub(page, hpage_nr_pages(page));
 }
 
@@ -415,7 +408,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry);
+		swapcache_free(new_page, entry);
 	} while (err != -ENOMEM);
 
 	if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 596306272059..9496cc3e955a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1144,7 +1144,7 @@ void swap_free(swp_entry_t entry)
 /*
  * Called after dropping swapcache to decrease refcnt to swap entries.
  */
-void swapcache_free(swp_entry_t entry)
+void __swapcache_free(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
 
@@ -1156,7 +1156,7 @@ void swapcache_free(swp_entry_t entry)
 }
 
 #ifdef CONFIG_THP_SWAP
-void swapcache_free_cluster(swp_entry_t entry)
+void __swapcache_free_cluster(swp_entry_t entry)
 {
 	unsigned long offset = swp_offset(entry);
 	unsigned long idx = offset / SWAPFILE_CLUSTER;
@@ -1182,6 +1182,14 @@ void swapcache_free_cluster(swp_entry_t entry)
 }
 #endif /* CONFIG_THP_SWAP */
 
+void swapcache_free(struct page *page, swp_entry_t entry)
+{
+	if (!PageTransHuge(page))
+		__swapcache_free(entry);
+	else
+		__swapcache_free_cluster(entry);
+}
+
 static int swp_entry_cmp(const void *ent1, const void *ent2)
 {
 	const swp_entry_t *e1 = ent1, *e2 = ent2;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ebf468c5429..0f8ca3d1761d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -708,7 +708,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		swapcache_free(swap);
+		swapcache_free(page, swap);
 	} else {
 		void (*freepage)(struct page *);
 		void *shadow = NULL;
-- 
2.7.4


On Mon, May 01, 2017 at 12:44:30PM +0200, Johannes Weiner wrote:
> On Fri, Apr 28, 2017 at 05:40:44PM +0900, Minchan Kim wrote:
> > However, get_swap_page is ugly now. The caller should take care of
> > failure and should retry after split. I hope get_swap_page includes
> > split and retry logic in itself without reling on the caller.
> 
> I think this makes the interface terrible. It's an allocation function
> to which you pass a reference object for size - and if the allocation
> doesn't succeed it'll split your reference object to make it fit?
> 
> That's a nasty side effect for this function to have.

It does make sense to me.

With that point of view, retry logic in add_to_swap is awkward to me, too.
The add_to_swap is a kind of allocate function(swap slot and swap cache)
so I think it would be better to move retry logic to vmscan.c.

What do you think about it?

>From e228d67e9677f8eeea1e424cab61b67884d534b4 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@...nel.org>
Date: Tue, 2 May 2017 08:30:41 +0900
Subject: [PATCH 2/2] swap: move anonymous THP split logic to vmscan

The add_to_swap aims to allocate swap_space(ie, swap slot and
swapcache) so if it fails due to lack of space in case of THP,
the caller should split the THP page and retry it with a page.

Cc: Johannes Weiner <hannes@...xchg.org>
Signed-off-by: Minchan Kim <minchan@...nel.org>
---
 include/linux/swap.h |  4 ++--
 mm/swap_state.c      | 23 ++++++-----------------
 mm/vmscan.c          | 22 +++++++++++++++++++++-
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 16c8d2392ddd..a305d27b12f8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -359,7 +359,7 @@ extern struct address_space *swapper_spaces[];
 		>> SWAP_ADDRESS_SPACE_SHIFT])
 extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *, struct list_head *list);
+extern int add_to_swap(struct page *);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
 extern void __delete_from_swap_cache(struct page *);
@@ -479,7 +479,7 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
 	return NULL;
 }
 
-static inline int add_to_swap(struct page *page, struct list_head *list)
+static inline int add_to_swap(struct page *page)
 {
 	return 0;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 4af44fd4142e..1b6ef1660b7e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -184,7 +184,7 @@ void __delete_from_swap_cache(struct page *page)
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page *page, struct list_head *list)
+int add_to_swap(struct page *page)
 {
 	swp_entry_t entry;
 	int err;
@@ -192,12 +192,12 @@ int add_to_swap(struct page *page, struct list_head *list)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageUptodate(page), page);
 
-retry:
 	entry = get_swap_page(page);
 	if (!entry.val)
-		goto fail;
+		return 0;
+
 	if (mem_cgroup_try_charge_swap(page, entry))
-		goto fail_free;
+		goto fail;
 
 	/*
 	 * Radix-tree node allocations from PF_MEMALLOC contexts could
@@ -218,23 +218,12 @@ int add_to_swap(struct page *page, struct list_head *list)
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		goto fail_free;
-
-	if (PageTransHuge(page)) {
-		err = split_huge_page_to_list(page, list);
-		if (err) {
-			delete_from_swap_cache(page);
-			return 0;
-		}
-	}
+		goto fail;
 
 	return 1;
 
-fail_free:
-	swapcache_free(page, entry);
 fail:
-	if (PageTransHuge(page) && !split_huge_page_to_list(page, list))
-		goto retry;
+	swapcache_free(page, entry);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f8ca3d1761d..2314aca47d12 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1125,8 +1125,28 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		    !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
-			if (!add_to_swap(page, page_list))
+swap_retry:
+			/*
+			 * Retry after split if we fail to allocate
+			 * swap space of a THP.
+			 */
+			if (!add_to_swap(page)) {
+				if (!PageTransHuge(page) ||
+				    split_huge_page_to_list(page, page_list))
+					goto activate_locked;
+				goto swap_retry;
+			}
+
+			/*
+			 * Got swap space successfully. But unfortunately,
+			 * we don't support a THP page writeout so split it.
+			 */
+			if (PageTransHuge(page) &&
+				  split_huge_page_to_list(page, page_list)) {
+				delete_from_swap_cache(page);
 				goto activate_locked;
+			}
+
 			may_enter_fs = 1;
 
 			/* Adding to swap updated mapping */
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ