linux-kernel - [PATCH RFC] mm: allow isolation for pages not inserted into lru lists yet

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-ID: <150039362282.196778.7901790444249317003.stgit@buzz>
Date:   Tue, 18 Jul 2017 19:00:23 +0300
From:   Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
To:     Michal Hocko <mhocko@...e.com>, Minchan Kim <minchan@...nel.org>,
        Hugh Dickins <hughd@...gle.com>, linux-mm@...ck.org,
        Shaohua Li <shli@...com>, Johannes Weiner <hannes@...xchg.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Mel Gorman <mgorman@...hsingularity.net>
Cc:     linux-kernel@...r.kernel.org
Subject: [PATCH RFC] mm: allow isolation for pages not inserted into lru
 lists yet

Pages are added into lru lists via per-cpu page vectors in order
to combine these insertions and reduce lru lock contention.

These pending pages cannot be isolated and moved into another lru.
This breaks in some cases page activation and makes mlock-munlock
much more complicated.

Also this breaks newly added swapless MADV_FREE: if it cannot move
anon page into file lru then page could never be freed lazily.

This patch rearranges lru list handling to allow lru isolation for
such pages. It set PageLRU earlier and initialize page->lru to mark
pages still pending for lru insert.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
---
 include/linux/mm_inline.h |   10 ++++++++--
 mm/swap.c                 |   26 ++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e030a68ead7e..6618c588ee40 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -60,8 +60,14 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
 static __always_inline void del_page_from_lru_list(struct page *page,
 				struct lruvec *lruvec, enum lru_list lru)
 {
-	list_del(&page->lru);
-	update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
+	/*
+	 * Empty list head means page is not drained to lru list yet.
+	 */
+	if (likely(!list_empty(&page->lru))) {
+		list_del(&page->lru);
+		update_lru_size(lruvec, lru, page_zonenum(page),
+				-hpage_nr_pages(page));
+	}
 }
 
 /**
diff --git a/mm/swap.c b/mm/swap.c
index 23fc6e049cda..ba4c98074a09 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -400,13 +400,35 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
+static void __pagevec_lru_add_drain_fn(struct page *page, struct lruvec *lruvec,
+				       void *arg)
+{
+	/* Check for isolated or already added pages */
+	if (likely(PageLRU(page) && list_empty(&page->lru))) {
+		int file = page_is_file_cache(page);
+		int active = PageActive(page);
+		enum lru_list lru = page_lru(page);
+
+		add_page_to_lru_list(page, lruvec, lru);
+		update_page_reclaim_stat(lruvec, file, active);
+		trace_mm_lru_insertion(page, lru);
+	}
+}
+
 static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
+	/*
+	 * Set PageLRU right here and initialize list head to
+	 * allow page isolation while it on the way to the LRU list.
+	 */
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	INIT_LIST_HEAD(&page->lru);
 	get_page(page);
+	SetPageLRU(page);
 	if (!pagevec_add(pvec, page) || PageCompound(page))
-		__pagevec_lru_add(pvec);
+		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
 	put_cpu_var(lru_add_pvec);
 }
 
@@ -611,7 +633,7 @@ void lru_add_drain_cpu(int cpu)
 	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
 
 	if (pagevec_count(pvec))
-		__pagevec_lru_add(pvec);
+		pagevec_lru_move_fn(pvec, __pagevec_lru_add_drain_fn, NULL);
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
 	if (pagevec_count(pvec)) {