lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251205231721.104505-6-mfo@igalia.com>
Date: Fri,  5 Dec 2025 20:17:17 -0300
From: Mauricio Faria de Oliveira <mfo@...lia.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
	David Hildenbrand <david@...nel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	Michal Hocko <mhocko@...e.com>,
	Vlastimil Babka <vbabka@...e.cz>,
	Oscar Salvador <osalvador@...e.de>,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	kernel-dev@...lia.com
Subject: [PATCH RFC 5/9] mm/page_owner: add swap hooks

Add the swap hooks 'prepare_to_swap()' (before swap-out and page-free),
'swap_restore()' (after page-alloc and swap-in), and 'swap_invalidate()'
for page and area.

The first two hooks are the core of the new functionality. They store
the (initial) allocation stack trace at swap-out and load it at swap-in,
in order to 'maintain' the allocation stack trace over swap-out/swap-in.

The refcounts for the initial allocation and the allocation at swap-in
are adjusted/fixed-up (incremented and decremented, respectively), as
the initial allocation is decremented at swap-out (page free) later, and
the swap-in allocation is incremented at swap-in (page alloc) earlier.

This is based on the swap hooks implementation for memory tags on arm64
('arch/arm64/mm/mteswap.c'; thanks!)

Signed-off-by: Mauricio Faria de Oliveira <mfo@...lia.com>
---
 include/linux/page_owner.h |  49 +++++++++++++++
 mm/page_owner.c            | 120 +++++++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+)

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 3328357f6dba..cd95aacceba7 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -75,4 +75,53 @@ static inline void dump_page_owner(const struct page *page)
 {
 }
 #endif /* CONFIG_PAGE_OWNER */
+
+#ifdef CONFIG_SWAP_PAGE_OWNER
+extern struct static_key_false swap_page_owner_inited;
+
+extern int __page_owner_prepare_to_swap(struct folio *folio);
+extern void __page_owner_swap_restore(swp_entry_t entry, struct folio *folio);
+extern void __page_owner_swap_invalidate_page(int type, pgoff_t offset);
+extern void __page_owner_swap_invalidate_area(int type);
+
+static inline int page_owner_prepare_to_swap(struct folio *folio)
+{
+	if (static_branch_unlikely(&swap_page_owner_inited))
+		return __page_owner_prepare_to_swap(folio);
+
+	return 0;
+}
+
+static inline void page_owner_swap_restore(swp_entry_t entry, struct folio *folio)
+{
+	if (static_branch_unlikely(&swap_page_owner_inited))
+		return __page_owner_swap_restore(entry, folio);
+}
+
+static inline void page_owner_swap_invalidate_page(int type, pgoff_t offset)
+{
+	if (static_branch_unlikely(&swap_page_owner_inited))
+		return __page_owner_swap_invalidate_page(type, offset);
+}
+
+static inline void page_owner_swap_invalidate_area(int type)
+{
+	if (static_branch_unlikely(&swap_page_owner_inited))
+		return __page_owner_swap_invalidate_area(type);
+}
+#else
+static inline int page_owner_prepare_to_swap(struct folio *folio)
+{
+	return 0;
+}
+static inline void page_owner_swap_restore(swp_entry_t entry, struct folio *folio)
+{
+}
+static inline void page_owner_swap_invalidate_page(int type, pgoff_t offset)
+{
+}
+static inline void page_owner_swap_invalidate_area(int type)
+{
+}
+#endif /* CONFIG_SWAP_PAGE_OWNER */
 #endif /* __LINUX_PAGE_OWNER_H */
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 5cd7de1f8023..d256f58deca4 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -525,6 +525,126 @@ static void copy_from_swap_page_owner(struct page_owner *page_owner,
 	page_owner->tgid = spo->tgid;
 	strscpy(page_owner->comm, spo->comm, sizeof(page_owner->comm));
 }
+
+/* Store the initial stack information from page_owner to xarray. */
+int __page_owner_prepare_to_swap(struct folio *folio)
+{
+	struct page_ext_iter iter;
+	struct page_ext *page_ext;
+	struct page_owner *page_owner;
+	struct swap_page_owner *spo;
+	depot_stack_handle_t handle = 0;
+	swp_entry_t entry;
+	long i = 0, nr_pages = folio_nr_pages(folio);
+	int err;
+
+	rcu_read_lock();
+	for_each_page_ext(&folio->page, nr_pages, page_ext, iter) {
+		spo = alloc_swap_page_owner();
+		if (!spo) {
+			err = -ENOMEM;
+			goto out_locked;
+		}
+
+		page_owner = get_page_owner(page_ext);
+		copy_to_swap_page_owner(spo, page_owner);
+		entry = page_swap_entry(folio_page(folio, i));
+		err = store_swap_page_owner(spo, entry);
+		if (err)
+			goto out_locked;
+
+		if (!handle)
+			handle = page_owner->handle;
+		i++;
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Fix-up: increment refcount of the initial allocation.
+	 * It will be decremented by page-free at swap-out.
+	 */
+	inc_stack_record_count(handle, GFP_KERNEL, nr_pages);
+
+	return 0;
+
+out_locked:
+	for_each_page_ext(&folio->page, nr_pages, page_ext, iter) {
+		if (!i--)
+			break;
+
+		entry = page_swap_entry(folio_page(folio, i));
+		erase_swap_page_owner(entry, true);
+
+		page_owner = get_page_owner(page_ext);
+
+	}
+	rcu_read_unlock();
+	return err;
+}
+
+/* Load the initial stack information from xarray to page_owner. */
+void __page_owner_swap_restore(swp_entry_t entry, struct folio *folio)
+{
+	struct page_ext_iter iter;
+	struct page_ext *page_ext;
+	struct page_owner *page_owner;
+	struct swap_page_owner *spo;
+	depot_stack_handle_t handle = 0;
+	long i = 0, nr_pages = folio_nr_pages(folio);
+
+	rcu_read_lock();
+	for_each_page_ext(&folio->page, nr_pages, page_ext, iter) {
+		spo = (struct swap_page_owner *) load_swap_page_owner(entry);
+		if (!spo) {
+			rcu_read_unlock();
+			return;
+		}
+
+		page_owner = get_page_owner(page_ext);
+		copy_from_swap_page_owner(page_owner, spo);
+
+		if (!handle)
+			handle = page_owner->handle;
+		i++;
+		entry.val++;
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Fix-up: decrement refcount of the swap-in allocation.
+	 * It was incremented by the page-alloc at swap-in.
+	 * (early_handle: see __reset_page_owner().)
+	 *
+	 * FIXME(mfo): 'dec_stack_record_count: refcount went to 0 ...'
+	 * with stack_depot oops is hit occasionaly on tests or shutdown.
+	 */
+	if (handle != early_handle)
+		dec_stack_record_count(handle, nr_pages);
+}
+
+void __page_owner_swap_invalidate_page(int type, pgoff_t offset)
+{
+	swp_entry_t entry = swp_entry(type, offset);
+
+	erase_swap_page_owner(entry, true);
+}
+
+void __page_owner_swap_invalidate_area(int type)
+{
+	swp_entry_t first_entry = swp_entry(type, 0);
+	swp_entry_t last_entry = swp_entry(type + 1, 0);
+	swp_entry_t entry;
+	void *spo;
+
+	XA_STATE(xa_state, &swap_page_owners, first_entry.val);
+
+	xa_lock(&swap_page_owners);
+	xas_for_each(&xa_state, spo, last_entry.val - 1) {
+		entry.val = xa_state.xa_index;
+		erase_swap_page_owner(entry, false);
+	}
+	xa_unlock(&swap_page_owners);
+}
 #endif
 
 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ