lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250407234223.1059191-8-nphamcs@gmail.com>
Date: Mon,  7 Apr 2025 16:42:08 -0700
From: Nhat Pham <nphamcs@...il.com>
To: linux-mm@...ck.org
Cc: akpm@...ux-foundation.org,
	hannes@...xchg.org,
	hughd@...gle.com,
	yosry.ahmed@...ux.dev,
	mhocko@...nel.org,
	roman.gushchin@...ux.dev,
	shakeel.butt@...ux.dev,
	muchun.song@...ux.dev,
	len.brown@...el.com,
	chengming.zhou@...ux.dev,
	kasong@...cent.com,
	chrisl@...nel.org,
	huang.ying.caritas@...il.com,
	ryan.roberts@....com,
	viro@...iv.linux.org.uk,
	baohua@...nel.org,
	osalvador@...e.de,
	lorenzo.stoakes@...cle.com,
	christophe.leroy@...roup.eu,
	pavel@...nel.org,
	kernel-team@...a.com,
	linux-kernel@...r.kernel.org,
	cgroups@...r.kernel.org,
	linux-pm@...r.kernel.org
Subject: [RFC PATCH 07/14] swap: implement the swap_cgroup API using virtual swap

Once we decouple a swap entry from its backing store via the virtual
swap, we can no longer statically allocate an array to store the swap
entries' cgroup information. Move it to the swap descriptor.

Signed-off-by: Nhat Pham <nphamcs@...il.com>
---
 mm/Makefile |  2 ++
 mm/vswap.c  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/mm/Makefile b/mm/Makefile
index b7216c714fa1..35f2f282c8da 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -101,8 +101,10 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
 ifdef CONFIG_SWAP
+ifndef CONFIG_VIRTUAL_SWAP
 obj-$(CONFIG_MEMCG) += swap_cgroup.o
 endif
+endif
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_GUP_TEST) += gup_test.o
 obj-$(CONFIG_DMAPOOL_TEST) += dmapool_test.o
diff --git a/mm/vswap.c b/mm/vswap.c
index 23a05c3393d8..3792fa7f766b 100644
--- a/mm/vswap.c
+++ b/mm/vswap.c
@@ -27,10 +27,14 @@
  *
  * @slot: The handle to the physical swap slot backing this page.
  * @rcu: The RCU head to free the descriptor with an RCU grace period.
+ * @memcgid: The memcg id of the owning memcg, if any.
  */
 struct swp_desc {
 	swp_slot_t slot;
 	struct rcu_head rcu;
+#ifdef CONFIG_MEMCG
+	atomic_t memcgid;
+#endif
 };
 
 /* Virtual swap space - swp_entry_t -> struct swp_desc */
@@ -122,8 +126,10 @@ static swp_entry_t vswap_alloc(int nr)
 		return (swp_entry_t){0};
 	}
 
-	for (i = 0; i < nr; i++)
+	for (i = 0; i < nr; i++) {
 		descs[i]->slot.val = 0;
+		atomic_set(&descs[i]->memcgid, 0);
+	}
 
 	xa_lock(&vswap_map);
 	if (nr == 1) {
@@ -352,6 +358,70 @@ swp_entry_t swp_slot_to_swp_entry(swp_slot_t slot)
 	return entry ? (swp_entry_t){xa_to_value(entry)} : (swp_entry_t){0};
 }
 
+#ifdef CONFIG_MEMCG
+static unsigned short vswap_cgroup_record(swp_entry_t entry,
+				unsigned short memcgid, unsigned int nr_ents)
+{
+	struct swp_desc *desc;
+	unsigned short oldid, iter = 0;
+
+	XA_STATE(xas, &vswap_map, entry.val);
+
+	rcu_read_lock();
+	xas_for_each(&xas, desc, entry.val + nr_ents - 1) {
+		if (xas_retry(&xas, desc))
+			continue;
+
+		oldid = atomic_xchg(&desc->memcgid, memcgid);
+		if (!iter)
+			iter = oldid;
+		VM_WARN_ON(iter != oldid);
+	}
+	rcu_read_unlock();
+
+	return oldid;
+}
+
+void swap_cgroup_record(struct folio *folio, unsigned short memcgid,
+			swp_entry_t entry)
+{
+	unsigned short oldid =
+		vswap_cgroup_record(entry, memcgid, folio_nr_pages(folio));
+
+	VM_WARN_ON(oldid);
+}
+
+unsigned short swap_cgroup_clear(swp_entry_t entry, unsigned int nr_ents)
+{
+	return vswap_cgroup_record(entry, 0, nr_ents);
+}
+
+unsigned short lookup_swap_cgroup_id(swp_entry_t entry)
+{
+	struct swp_desc *desc;
+	unsigned short ret;
+
+	/*
+	 * Note that the virtual swap slot can be freed under us, for instance in
+	 * the invocation of mem_cgroup_swapin_charge_folio. We need to wrap the
+	 * entire lookup in RCU read-side critical section, and double check the
+	 * existence of the swap descriptor.
+	 */
+	rcu_read_lock();
+	desc = xa_load(&vswap_map, entry.val);
+	ret = desc ? atomic_read(&desc->memcgid) : 0;
+	rcu_read_unlock();
+	return ret;
+}
+
+int swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+	return 0;
+}
+
+void swap_cgroup_swapoff(int type) {}
+#endif /* CONFIG_MEMCG */
+
 int vswap_init(void)
 {
 	swp_desc_cache = KMEM_CACHE(swp_desc, 0);
-- 
2.47.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ