lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251109124947.1101520-4-youngjun.park@lge.com>
Date: Sun,  9 Nov 2025 21:49:47 +0900
From: Youngjun Park <youngjun.park@....com>
To: akpm@...ux-foundation.org,
	linux-mm@...ck.org
Cc: cgroups@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	chrisl@...nel.org,
	kasong@...cent.com,
	hannes@...xchg.org,
	mhocko@...nel.org,
	roman.gushchin@...ux.dev,
	shakeel.butt@...ux.dev,
	muchun.song@...ux.dev,
	shikemeng@...weicloud.com,
	nphamcs@...il.com,
	bhe@...hat.com,
	baohua@...nel.org,
	youngjun.park@....com,
	gunho.lee@....com,
	taejoon.song@....com
Subject: [PATCH 3/3] mm/swap: integrate swap tier infrastructure into swap subsystem

Integrate the swap tier infrastructure into the existing swap subsystem
to enable selective swap device usage based on tier configuration.

Signed-off-by: Youngjun Park <youngjun.park@....com>
---
 mm/memcontrol.c | 69 ++++++++++++++++++++++++++++++++++++
 mm/page_io.c    | 21 ++++++++++-
 mm/swap_state.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/swapfile.c   | 15 ++++++--
 4 files changed, 194 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bfc986da3289..33c7cc069754 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -68,6 +68,7 @@
 #include <net/ip.h>
 #include "slab.h"
 #include "memcontrol-v1.h"
+#include "swap_tier.h"
 
 #include <linux/uaccess.h>
 
@@ -3730,6 +3731,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	lru_gen_exit_memcg(memcg);
 	memcg_wb_domain_exit(memcg);
+	swap_tiers_put_mask(memcg);
 	__mem_cgroup_free(memcg);
 }
 
@@ -3842,6 +3844,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->kmem, &parent->kmem, false);
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem, false);
 #endif
+#ifdef CONFIG_SWAP_TIER
+		memcg->tiers_mask = 0;
+		memcg->tiers_onoff = 0;
+#endif
+
 	} else {
 		init_memcg_stats();
 		init_memcg_events();
@@ -3850,6 +3857,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 #ifdef CONFIG_MEMCG_V1
 		page_counter_init(&memcg->kmem, NULL, false);
 		page_counter_init(&memcg->tcpmem, NULL, false);
+#endif
+#ifdef CONFIG_SWAP_TIER
+		memcg->tiers_mask = DEFAULT_FULL_MASK;
+		memcg->tiers_onoff = DEFAULT_ON_MASK;
 #endif
 		root_mem_cgroup = memcg;
 		return &memcg->css;
@@ -5390,6 +5401,56 @@ static int swap_events_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_SWAP_TIER
+static int swap_tier_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+	swap_tiers_show_memcg(m, memcg);
+	return 0;
+}
+
+static ssize_t swap_tier_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	struct tiers_desc desc[MAX_SWAPTIER] = {};
+	char *pos = buf, *token;
+	int nr = 0;
+	int ret;
+
+	while ((token = strsep(&pos, " \t\n")) != NULL) {
+		if (!*token)
+			continue;
+
+		if (nr >= MAX_SWAPTIER)
+			return -E2BIG;
+
+		if (token[0] != '+' && token[0] != '-')
+			return -EINVAL;
+
+		desc[nr].ops = (token[0] == '+') ? TIER_ON_MASK : TIER_OFF_MASK;
+
+		if (strlen(token) <= 1) {
+			strscpy(desc[nr].name, DEFAULT_TIER_NAME);
+			nr++;
+			continue;
+		}
+
+		if (strscpy(desc[nr].name, token + 1, MAX_TIERNAME) < 0)
+			return -EINVAL;
+
+		nr++;
+	}
+
+	ret = swap_tiers_get_mask(desc, nr, memcg);
+	if (ret)
+		return ret;
+
+	return nbytes;
+}
+#endif
+
 static struct cftype swap_files[] = {
 	{
 		.name = "swap.current",
@@ -5422,6 +5483,14 @@ static struct cftype swap_files[] = {
 		.file_offset = offsetof(struct mem_cgroup, swap_events_file),
 		.seq_show = swap_events_show,
 	},
+#ifdef CONFIG_SWAP_TIER
+	{
+		.name = "swap.tiers",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = swap_tier_show,
+		.write = swap_tier_write,
+	},
+#endif
 	{ }	/* terminate */
 };
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 3c342db77ce3..2b3b1154a169 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -26,6 +26,7 @@
 #include <linux/delayacct.h>
 #include <linux/zswap.h>
 #include "swap.h"
+#include "swap_tier.h"
 
 static void __end_swap_bio_write(struct bio *bio)
 {
@@ -233,6 +234,24 @@ static void swap_zeromap_folio_clear(struct folio *folio)
 	}
 }
 
+#if defined(CONFIG_SWAP_TIER) && defined(CONFIG_ZSWAP)
+static bool folio_swap_tier_zswap_test_off(struct folio *folio)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = folio_memcg(folio);
+	if (memcg)
+		return swap_tier_test_off(memcg->tiers_mask,
+			TIER_MASK(SWAP_TIER_ZSWAP, TIER_ON_MASK));
+
+	return false;
+}
+#else
+static bool folio_swap_tier_zswap_test_off(struct folio *folio)
+{
+	return false;
+}
+#endif
 /*
  * We may have stale swap cache pages in memory: notice
  * them here and get rid of the unnecessary final write.
@@ -272,7 +291,7 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
 	 */
 	swap_zeromap_folio_clear(folio);
 
-	if (zswap_store(folio)) {
+	if (folio_swap_tier_zswap_test_off(folio) || zswap_store(folio)) {
 		count_mthp_stat(folio_order(folio), MTHP_STAT_ZSWPOUT);
 		goto out_unlock;
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3f85a1c4cfd9..2e5f65ff2479 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -25,6 +25,7 @@
 #include "internal.h"
 #include "swap_table.h"
 #include "swap.h"
+#include "swap_tier.h"
 
 /*
  * swapper_space is a fiction, retained to simplify the path through
@@ -836,8 +837,100 @@ static ssize_t vma_ra_enabled_store(struct kobject *kobj,
 }
 static struct kobj_attribute vma_ra_enabled_attr = __ATTR_RW(vma_ra_enabled);
 
+#ifdef CONFIG_SWAP_TIER
+static ssize_t tiers_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return swap_tiers_show_sysfs(buf);
+}
+
+static ssize_t tiers_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct tiers_desc desc[MAX_SWAPTIER] = {};
+	int nr = 0;
+	char *data, *p, *token;
+	int ret = 0;
+	bool is_add = true;
+
+	if (!count)
+		return -EINVAL;
+
+	data = kmemdup_nul(buf, count, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	p = data;
+
+	if (*p == '+')
+		p++;
+	else if (*p == '-') {
+		is_add = false;
+		p++;
+	} else
+		return -EINVAL;
+
+	while ((token = strsep(&p, ", \t\n")) != NULL) {
+		if (!*token)
+			continue;
+
+		if (nr >= MAX_SWAPTIER) {
+			ret = -E2BIG;
+			goto out;
+		}
+
+		if (is_add) {
+			char *name, *prio_str;
+			int prio;
+
+			name = strsep(&token, ":");
+			prio_str = token;
+
+			if (!name || !prio_str || !*name || !*prio_str) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (strscpy(desc[nr].name, name, MAX_TIERNAME) < 0) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (kstrtoint(prio_str, 10, &prio)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			desc[nr].prio_st = prio;
+		} else {
+			if (strscpy(desc[nr].name, token, MAX_TIERNAME) < 0) {
+				ret = -EINVAL;
+				goto out;
+			}
+			desc[nr].prio_st = 0;
+		}
+		nr++;
+	}
+
+	if (is_add)
+		ret = swap_tiers_add(desc, nr);
+	else
+		ret = swap_tiers_remove(desc, nr);
+
+out:
+	kfree(data);
+	return ret ? ret : count;
+}
+
+static struct kobj_attribute tier_attr = __ATTR_RW(tiers);
+#endif
+
 static struct attribute *swap_attrs[] = {
 	&vma_ra_enabled_attr.attr,
+#ifdef CONFIG_SWAP_TIER
+	&tier_attr.attr,
+#endif
 	NULL,
 };
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a5c90e419ff3..8715a2d94140 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -49,6 +49,7 @@
 #include "swap_table.h"
 #include "internal.h"
 #include "swap.h"
+#include "swap_tier.h"
 
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
@@ -1296,7 +1297,8 @@ static bool get_swap_device_info(struct swap_info_struct *si)
 
 /* Rotate the device and switch to a new cluster */
 static void swap_alloc_entry(swp_entry_t *entry,
-			    int order)
+			    int order,
+			    int mask)
 {
 	unsigned long offset;
 	struct swap_info_struct *si, *next;
@@ -1304,6 +1306,8 @@ static void swap_alloc_entry(swp_entry_t *entry,
 	spin_lock(&swap_avail_lock);
 start_over:
 	plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
+		if (swap_tiers_test_off(si->tier_idx, mask))
+			continue;
 		/* Rotate the device and switch to a new cluster */
 		plist_requeue(&si->avail_list, &swap_avail_head);
 		spin_unlock(&swap_avail_lock);
@@ -1376,6 +1380,7 @@ int folio_alloc_swap(struct folio *folio)
 {
 	unsigned int order = folio_order(folio);
 	unsigned int size = 1 << order;
+	int mask;
 	swp_entry_t entry = {};
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -1400,8 +1405,8 @@ int folio_alloc_swap(struct folio *folio)
 	}
 
 again:
-	swap_alloc_entry(&entry, order);
-
+	mask = swap_tiers_collect_compare_mask(folio_memcg(folio));
+	swap_alloc_entry(&entry, order, mask);
 	if (unlikely(!order && !entry.val)) {
 		if (swap_sync_discard())
 			goto again;
@@ -2673,6 +2678,8 @@ static void _enable_swap_info(struct swap_info_struct *si)
 
 	/* Add back to available list */
 	add_to_avail_list(si, true);
+
+	swap_tiers_assign(si);
 }
 
 static void enable_swap_info(struct swap_info_struct *si, int prio,
@@ -2840,6 +2847,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	spin_lock(&swap_lock);
 	spin_lock(&p->lock);
 	drain_mmlist();
+	swap_tiers_release(p);
 
 	swap_file = p->swap_file;
 	p->swap_file = NULL;
@@ -4004,6 +4012,7 @@ static int __init swapfile_init(void)
 		swap_migration_ad_supported = true;
 #endif	/* CONFIG_MIGRATION */
 
+	swap_tiers_init();
 	return 0;
 }
 subsys_initcall(swapfile_init);
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ