linux-kernel - [RFC 1/4] mm/zswap: skip swapcache for swapping in zswap pages

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20241018105026.2521366-2-usamaarif642@gmail.com>
Date: Fri, 18 Oct 2024 11:48:39 +0100
From: Usama Arif <usamaarif642@...il.com>
To: akpm@...ux-foundation.org,
	linux-mm@...ck.org
Cc: hannes@...xchg.org,
	david@...hat.com,
	willy@...radead.org,
	kanchana.p.sridhar@...el.com,
	yosryahmed@...gle.com,
	nphamcs@...il.com,
	chengming.zhou@...ux.dev,
	ryan.roberts@....com,
	ying.huang@...el.com,
	21cnbao@...il.com,
	riel@...riel.com,
	shakeel.butt@...ux.dev,
	kernel-team@...a.com,
	linux-kernel@...r.kernel.org,
	linux-doc@...r.kernel.org,
	Usama Arif <usamaarif642@...il.com>
Subject: [RFC 1/4] mm/zswap: skip swapcache for swapping in zswap pages

As mentioned in [1], there is a significant improvement in no
readahead swapin performance for super fast devices when skipping
swapcache.

With large folio zswapin support added in later patches, this will also
mean this path will also act as "readahead" by swapping in multiple
pages into large folios. further improving performance.

[1] https://lore.kernel.org/all/1505886205-9671-5-git-send-email-minchan@kernel.org/T/#m5a792a04dfea20eb7af4c355d00503efe1c86a93

Signed-off-by: Usama Arif <usamaarif642@...il.com>
---
 include/linux/zswap.h |  6 ++++++
 mm/memory.c           |  3 ++-
 mm/page_io.c          |  1 -
 mm/zswap.c            | 46 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index d961ead91bf1..e418d75db738 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -27,6 +27,7 @@ struct zswap_lruvec_state {
 unsigned long zswap_total_pages(void);
 bool zswap_store(struct folio *folio);
 bool zswap_load(struct folio *folio);
+bool zswap_present_test(swp_entry_t swp, int nr_pages);
 void zswap_invalidate(swp_entry_t swp);
 int zswap_swapon(int type, unsigned long nr_pages);
 void zswap_swapoff(int type);
@@ -49,6 +50,11 @@ static inline bool zswap_load(struct folio *folio)
 	return false;
 }
 
+static inline bool zswap_present_test(swp_entry_t swp, int nr_pages)
+{
+	return false;
+}
+
 static inline void zswap_invalidate(swp_entry_t swp) {}
 static inline int zswap_swapon(int type, unsigned long nr_pages)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 03e5452dd0c0..49d243131169 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4289,7 +4289,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	swapcache = folio;
 
 	if (!folio) {
-		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
+		if ((data_race(si->flags & SWP_SYNCHRONOUS_IO) ||
+		    zswap_present_test(entry, 1)) &&
 		    __swap_count(entry) == 1) {
 			/* skip swapcache */
 			folio = alloc_swap_folio(vmf);
diff --git a/mm/page_io.c b/mm/page_io.c
index 4aa34862676f..2a15b197968a 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -602,7 +602,6 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 	unsigned long pflags;
 	bool in_thrashing;
 
-	VM_BUG_ON_FOLIO(!folio_test_swapcache(folio) && !synchronous, folio);
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 	VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
 
diff --git a/mm/zswap.c b/mm/zswap.c
index 7f00cc918e7c..f4b03071b2fb 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1576,6 +1576,52 @@ bool zswap_store(struct folio *folio)
 	return ret;
 }
 
+static bool swp_offset_in_zswap(unsigned int type, pgoff_t offset)
+{
+	return (offset >> SWAP_ADDRESS_SPACE_SHIFT) <  nr_zswap_trees[type];
+}
+
+/* Returns true if the entire folio is in zswap */
+bool zswap_present_test(swp_entry_t swp, int nr_pages)
+{
+	pgoff_t offset = swp_offset(swp), tree_max_idx;
+	int max_idx = 0, i = 0, tree_offset = 0;
+	unsigned int type = swp_type(swp);
+	struct zswap_entry *entry = NULL;
+	struct xarray *tree;
+
+	while (i < nr_pages) {
+		tree_offset = offset + i;
+		/* Check if the tree exists. */
+		if (!swp_offset_in_zswap(type, tree_offset))
+			return false;
+
+		tree = swap_zswap_tree(swp_entry(type, tree_offset));
+		XA_STATE(xas, tree, tree_offset);
+
+		tree_max_idx = tree_offset % SWAP_ADDRESS_SPACE_PAGES ?
+			ALIGN(tree_offset, SWAP_ADDRESS_SPACE_PAGES) :
+			ALIGN(tree_offset + 1, SWAP_ADDRESS_SPACE_PAGES);
+		max_idx = min(offset + nr_pages, tree_max_idx) - 1;
+		rcu_read_lock();
+		xas_for_each(&xas, entry, max_idx) {
+			if (xas_retry(&xas, entry))
+				continue;
+			i++;
+		}
+		rcu_read_unlock();
+		/*
+		 * If xas_for_each exits because entry is NULL and
+		 * the number of entries checked are less then max idx,
+		 * then zswap does not contain the entire folio.
+		 */
+		if (!entry && offset + i <= max_idx)
+			return false;
+	}
+
+	return true;
+}
+
 bool zswap_load(struct folio *folio)
 {
 	swp_entry_t swp = folio->swap;
-- 
2.43.5