[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260126-swap-table-p3-v1-7-a74155fab9b0@tencent.com>
Date: Mon, 26 Jan 2026 01:57:30 +0800
From: Kairui Song <ryncsn@...il.com>
To: linux-mm@...ck.org
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Kemeng Shi <shikemeng@...weicloud.com>, Nhat Pham <nphamcs@...il.com>,
Baoquan He <bhe@...hat.com>, Barry Song <baohua@...nel.org>,
Johannes Weiner <hannes@...xchg.org>, David Hildenbrand <david@...nel.org>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>, linux-kernel@...r.kernel.org,
Chris Li <chrisl@...nel.org>, Kairui Song <kasong@...cent.com>
Subject: [PATCH 07/12] mm, swap: mark bad slots in swap table directly
From: Kairui Song <kasong@...cent.com>
In preparing the deprecating swap_map, mark bad slots in the swap table
too when setting SWAP_MAP_BAD in swap_map. Also, refine the swap table
sanity check on freeing to adapt to the bad slots change. For swapoff,
the bad slots count must match the cluster usage count, as nothing
should touch them, and they contribute to the cluster usage count on
swapon. For ordinary swap table freeing, the swap table of clusters with
bad slots should never be freed since the cluster usage count never
reaches zero.
Signed-off-by: Kairui Song <kasong@...cent.com>
---
mm/swapfile.c | 56 +++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 41 insertions(+), 15 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index df8b13eecab1..bdce2abd9135 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -454,16 +454,37 @@ static void swap_table_free(struct swap_table *table)
swap_table_free_folio_rcu_cb);
}
+/*
+ * Sanity check to ensure nothing leaked, and the specified range is empty.
+ * One special case is that bad slots can't be freed, so check the number of
+ * bad slots for swapoff, and non-swapoff path must never free bad slots.
+ */
+static void swap_cluster_assert_empty(struct swap_cluster_info *ci, bool swapoff)
+{
+ unsigned int ci_off = 0, ci_end = SWAPFILE_CLUSTER;
+ unsigned long swp_tb;
+ int bad_slots = 0;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_VM) && !swapoff)
+ return;
+
+ do {
+ swp_tb = __swap_table_get(ci, ci_off);
+ if (swp_tb_is_bad(swp_tb))
+ bad_slots++;
+ else
+ WARN_ON_ONCE(!swp_tb_is_null(swp_tb));
+ } while (++ci_off < ci_end);
+
+ WARN_ON_ONCE(bad_slots != (swapoff ? ci->count : 0));
+}
+
static void swap_cluster_free_table(struct swap_cluster_info *ci)
{
- unsigned int ci_off;
struct swap_table *table;
/* Only empty cluster's table is allow to be freed */
lockdep_assert_held(&ci->lock);
- VM_WARN_ON_ONCE(!cluster_is_empty(ci));
- for (ci_off = 0; ci_off < SWAPFILE_CLUSTER; ci_off++)
- VM_WARN_ON_ONCE(!swp_tb_is_null(__swap_table_get(ci, ci_off)));
table = (void *)rcu_dereference_protected(ci->table, true);
rcu_assign_pointer(ci->table, NULL);
@@ -567,6 +588,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci)
{
+ swap_cluster_assert_empty(ci, false);
swap_cluster_free_table(ci);
move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE);
ci->order = 0;
@@ -747,9 +769,11 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
struct swap_cluster_info *cluster_info,
unsigned int offset, bool mask)
{
+ unsigned int ci_off = offset % SWAPFILE_CLUSTER;
unsigned long idx = offset / SWAPFILE_CLUSTER;
- struct swap_table *table;
struct swap_cluster_info *ci;
+ struct swap_table *table;
+ int ret = 0;
/* si->max may got shrunk by swap swap_activate() */
if (offset >= si->max && !mask) {
@@ -767,13 +791,7 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
pr_warn("Empty swap-file\n");
return -EINVAL;
}
- /* Check for duplicated bad swap slots. */
- if (si->swap_map[offset]) {
- pr_warn("Duplicated bad slot offset %d\n", offset);
- return -EINVAL;
- }
- si->swap_map[offset] = SWAP_MAP_BAD;
ci = cluster_info + idx;
if (!ci->table) {
table = swap_table_alloc(GFP_KERNEL);
@@ -781,13 +799,21 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
return -ENOMEM;
rcu_assign_pointer(ci->table, table);
}
-
- ci->count++;
+ spin_lock(&ci->lock);
+ /* Check for duplicated bad swap slots. */
+ if (__swap_table_xchg(ci, ci_off, SWP_TB_BAD) != SWP_TB_NULL) {
+ pr_warn("Duplicated bad slot offset %d\n", offset);
+ ret = -EINVAL;
+ } else {
+ si->swap_map[offset] = SWAP_MAP_BAD;
+ ci->count++;
+ }
+ spin_unlock(&ci->lock);
WARN_ON(ci->count > SWAPFILE_CLUSTER);
WARN_ON(ci->flags);
- return 0;
+ return ret;
}
/*
@@ -2743,7 +2769,7 @@ static void free_swap_cluster_info(struct swap_cluster_info *cluster_info,
/* Cluster with bad marks count will have a remaining table */
spin_lock(&ci->lock);
if (rcu_dereference_protected(ci->table, true)) {
- ci->count = 0;
+ swap_cluster_assert_empty(ci, true);
swap_cluster_free_table(ci);
}
spin_unlock(&ci->lock);
--
2.52.0
Powered by blists - more mailing lists