lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230920190244.16839-3-ryncsn@gmail.com>
Date:   Thu, 21 Sep 2023 03:02:40 +0800
From:   Kairui Song <ryncsn@...il.com>
To:     linux-mm@...ck.org
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Yu Zhao <yuzhao@...gle.com>,
        Roman Gushchin <roman.gushchin@...ux.dev>,
        Johannes Weiner <hannes@...xchg.org>,
        Michal Hocko <mhocko@...e.com>,
        Hugh Dickins <hughd@...gle.com>, Nhat Pham <nphamcs@...il.com>,
        Yuanchu Xie <yuanchu@...gle.com>,
        Kalesh Singh <kaleshsingh@...gle.com>,
        Suren Baghdasaryan <surenb@...gle.com>,
        "T . J . Mercier" <tjmercier@...gle.com>,
        linux-kernel@...r.kernel.org, Kairui Song <kasong@...cent.com>
Subject: [RFC PATCH v3 2/6] workingset: move refault distance checking into to a helper

From: Kairui Song <kasong@...cent.com>

There isn't any feature change, just move the refault distance checking
logic into a standalone helper so it can be reused later.

Signed-off-by: Kairui Song <kasong@...cent.com>
---
 mm/workingset.c | 137 ++++++++++++++++++++++++++++--------------------
 1 file changed, 79 insertions(+), 58 deletions(-)

diff --git a/mm/workingset.c b/mm/workingset.c
index 8613945fc66e..b0704cbfc667 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -170,9 +170,10 @@
  */
 
 #define WORKINGSET_SHIFT 1
-#define EVICTION_SHIFT	((BITS_PER_LONG - BITS_PER_XA_VALUE) +	\
+#define EVICTION_SHIFT	((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
 			 WORKINGSET_SHIFT + NODES_SHIFT + \
 			 MEM_CGROUP_ID_SHIFT)
+#define EVICTION_BITS	(BITS_PER_LONG - (EVICTION_SHIFT))
 #define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 
 /*
@@ -216,6 +217,79 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
 	*workingsetp = workingset;
 }
 
+/*
+ * Get the refault distance timestamp reading at eviction time.
+ */
+static inline unsigned long lru_eviction(struct lruvec *lruvec,
+					 int bits, int bucket_order)
+{
+	unsigned long eviction = atomic_long_read(&lruvec->nonresident_age);
+
+	eviction >>= bucket_order;
+	eviction &= ~0UL >> (BITS_PER_LONG - bits);
+
+	return eviction;
+}
+
+/*
+ * Calculate and test refault distance.
+ */
+static inline bool lru_test_refault(struct mem_cgroup *memcg,
+				    struct lruvec *lruvec,
+				    unsigned long eviction, bool file,
+				    int bits, int bucket_order)
+{
+	unsigned long refault, distance;
+	unsigned long active, inactive_file, inactive_anon;
+
+	eviction <<= bucket_order;
+	refault = atomic_long_read(&lruvec->nonresident_age);
+
+	/*
+	 * The unsigned subtraction here gives an accurate distance
+	 * across nonresident_age overflows in most cases. There is a
+	 * special case: usually, shadow entries have a short lifetime
+	 * and are either refaulted or reclaimed along with the inode
+	 * before they get too old.  But it is not impossible for the
+	 * nonresident_age to lap a shadow entry in the field, which
+	 * can then result in a false small refault distance, leading
+	 * to a false activation should this old entry actually
+	 * refault again.  However, earlier kernels used to deactivate
+	 * unconditionally with *every* reclaim invocation for the
+	 * longest time, so the occasional inappropriate activation
+	 * leading to pressure on the active list is not a problem.
+	 */
+	distance = (refault - eviction) & (~0UL >> (BITS_PER_LONG - bits));
+
+	/*
+	 * Compare the distance to the existing workingset size. We
+	 * don't activate pages that couldn't stay resident even if
+	 * all the memory was available to the workingset. Whether
+	 * workingset competition needs to consider anon or not depends
+	 * on having free swap space.
+	 */
+	active = lruvec_page_state(lruvec, NR_ACTIVE_FILE);
+	inactive_file = lruvec_page_state(lruvec, NR_INACTIVE_FILE);
+
+	if (mem_cgroup_get_nr_swap_pages(memcg) > 0) {
+		active += lruvec_page_state(lruvec, NR_ACTIVE_ANON);
+		inactive_anon = lruvec_page_state(lruvec, NR_INACTIVE_ANON);
+	} else {
+		inactive_anon = 0;
+	}
+
+	/*
+	 * When there are already enough active pages, be less aggressive
+	 * on reactivating pages, challenge an large set of established
+	 * active pages with one time refaulted page may not be a good idea.
+	 */
+	if (active >= inactive_anon + inactive_file)
+		return distance < inactive_anon + inactive_file;
+	else
+		return distance < active + \
+			(file ? inactive_anon : inactive_file);
+}
+
 #ifdef CONFIG_LRU_GEN
 
 static void *lru_gen_eviction(struct folio *folio)
@@ -386,11 +460,10 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
 	lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
 	/* XXX: target_memcg can be NULL, go through lruvec */
 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
-	eviction = atomic_long_read(&lruvec->nonresident_age);
-	eviction >>= bucket_order;
+	eviction = lru_eviction(lruvec, EVICTION_BITS, bucket_order);
 	workingset_age_nonresident(lruvec, folio_nr_pages(folio));
 	return pack_shadow(memcgid, pgdat, eviction,
-				folio_test_workingset(folio));
+			   folio_test_workingset(folio));
 }
 
 /**
@@ -408,11 +481,6 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
 {
 	struct mem_cgroup *eviction_memcg;
 	struct lruvec *eviction_lruvec;
-	unsigned long refault_distance;
-	unsigned long inactive_file;
-	unsigned long inactive_anon;
-	unsigned long refault;
-	unsigned long active;
 	int memcgid;
 	struct pglist_data *pgdat;
 	unsigned long eviction;
@@ -421,7 +489,6 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
 		return lru_gen_test_recent(shadow, file, &eviction_lruvec, &eviction, workingset);
 
 	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset);
-	eviction <<= bucket_order;
 
 	/*
 	 * Look up the memcg associated with the stored ID. It might
@@ -442,56 +509,10 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
 	eviction_memcg = mem_cgroup_from_id(memcgid);
 	if (!mem_cgroup_disabled() && !eviction_memcg)
 		return false;
-
 	eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
-	refault = atomic_long_read(&eviction_lruvec->nonresident_age);
 
-	/*
-	 * Calculate the refault distance
-	 *
-	 * The unsigned subtraction here gives an accurate distance
-	 * across nonresident_age overflows in most cases. There is a
-	 * special case: usually, shadow entries have a short lifetime
-	 * and are either refaulted or reclaimed along with the inode
-	 * before they get too old.  But it is not impossible for the
-	 * nonresident_age to lap a shadow entry in the field, which
-	 * can then result in a false small refault distance, leading
-	 * to a false activation should this old entry actually
-	 * refault again.  However, earlier kernels used to deactivate
-	 * unconditionally with *every* reclaim invocation for the
-	 * longest time, so the occasional inappropriate activation
-	 * leading to pressure on the active list is not a problem.
-	 */
-	refault_distance = (refault - eviction) & EVICTION_MASK;
-
-	/*
-	 * Compare the distance to the existing workingset size. We
-	 * don't activate pages that couldn't stay resident even if
-	 * all the memory was available to the workingset. Whether
-	 * workingset competition needs to consider anon or not depends
-	 * on having free swap space.
-	 */
-	active = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE);
-	inactive_file = lruvec_page_state(eviction_lruvec, NR_INACTIVE_FILE);
-
-	if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) {
-		active += lruvec_page_state(eviction_lruvec,
-						     NR_ACTIVE_ANON);
-		inactive_anon = lruvec_page_state(eviction_lruvec,
-						  NR_INACTIVE_ANON);
-	} else {
-		inactive_anon = 0;
-	}
-
-	/*
-	 * When there are already enough active pages, be less aggressive
-	 * on reactivating pages, challenge an large set of established
-	 * active pages with one time refaulted page may not be a good idea.
-	 */
-	if (active >= inactive_anon + inactive_file)
-		return refault_distance < inactive_anon + inactive_file;
-	else
-		return refault_distance < active + (file ? inactive_anon : inactive_file);
+	return lru_test_refault(eviction_memcg, eviction_lruvec, eviction,
+				file, EVICTION_BITS, bucket_order);
 }
 
 /**
-- 
2.41.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ