[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250822192023.13477-3-ryncsn@gmail.com>
Date: Sat, 23 Aug 2025 03:20:16 +0800
From: Kairui Song <ryncsn@...il.com>
To: linux-mm@...ck.org
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Matthew Wilcox <willy@...radead.org>,
Hugh Dickins <hughd@...gle.com>,
Chris Li <chrisl@...nel.org>,
Barry Song <baohua@...nel.org>,
Baoquan He <bhe@...hat.com>,
Nhat Pham <nphamcs@...il.com>,
Kemeng Shi <shikemeng@...weicloud.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Ying Huang <ying.huang@...ux.alibaba.com>,
Johannes Weiner <hannes@...xchg.org>,
David Hildenbrand <david@...hat.com>,
Yosry Ahmed <yosryahmed@...gle.com>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
Zi Yan <ziy@...dia.com>,
linux-kernel@...r.kernel.org,
Kairui Song <kasong@...cent.com>
Subject: [PATCH 2/9] mm, swap: always lock and check the swap cache folio before use
From: Kairui Song <kasong@...cent.com>
Swap cache lookup is lockless, it only increases the reference count
of the returned folio. That's not enough to ensure a folio is stable in
the swap cache, so the folio could be removed from the swap cache at any
time. The caller always has to lock and check the folio before use.
Document this as a comment, and introduce a helper for swap cache folio
verification with proper sanity checks.
Also, sanitize all current users to use this convention, and use the new
helper when possible for easier debugging. Some existing callers won't
cause any major problem right now, only trivial issues like incorrect
readahead statistic (swapin) or wasted loop (swapoff). It's better to
always follow this convention to make things robust.
Signed-off-by: Kairui Song <kasong@...cent.com>
---
mm/memory.c | 28 +++++++++++++---------------
mm/shmem.c | 4 ++--
mm/swap.h | 28 ++++++++++++++++++++++++++++
mm/swap_state.c | 13 +++++++++----
mm/swapfile.c | 10 ++++++++--
5 files changed, 60 insertions(+), 23 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 10ef528a5f44..9ca8e1873c6e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4661,12 +4661,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out;
folio = swap_cache_get_folio(entry);
- if (folio) {
- swap_update_readahead(folio, vma, vmf->address);
- page = folio_file_page(folio, swp_offset(entry));
- }
swapcache = folio;
-
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
__swap_count(entry) == 1) {
@@ -4735,20 +4730,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
- page = folio_file_page(folio, swp_offset(entry));
- } else if (PageHWPoison(page)) {
- /*
- * hwpoisoned dirty swapcache pages are kept for killing
- * owner processes (which may be unknown at hwpoison time)
- */
- ret = VM_FAULT_HWPOISON;
- goto out_release;
}
ret |= folio_lock_or_retry(folio, vmf);
if (ret & VM_FAULT_RETRY)
goto out_release;
+ page = folio_file_page(folio, swp_offset(entry));
if (swapcache) {
/*
* Make sure folio_free_swap() or swapoff did not release the
@@ -4757,10 +4745,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* swapcache, we need to check that the page's swap has not
* changed.
*/
- if (unlikely(!folio_test_swapcache(folio) ||
- page_swap_entry(page).val != entry.val))
+ if (!folio_contains_swap(folio, entry))
goto out_page;
+ if (PageHWPoison(page)) {
+ /*
+ * hwpoisoned dirty swapcache pages are kept for killing
+ * owner processes (which may be unknown at hwpoison time)
+ */
+ ret = VM_FAULT_HWPOISON;
+ goto out_page;
+ }
+
+ swap_update_readahead(folio, vma, vmf->address);
+
/*
* KSM sometimes has to copy on read faults, for example, if
* folio->index of non-ksm folios would be nonlinear inside the
diff --git a/mm/shmem.c b/mm/shmem.c
index e9d0d2784cd5..b4d39f2a1e0a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2379,8 +2379,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(fault_mm, PGMAJFAULT);
}
- } else {
- swap_update_readahead(folio, NULL, 0);
}
if (order > folio_order(folio)) {
@@ -2431,6 +2429,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -EIO;
goto failed;
}
+ if (!skip_swapcache)
+ swap_update_readahead(folio, NULL, 0);
folio_wait_writeback(folio);
nr_pages = folio_nr_pages(folio);
diff --git a/mm/swap.h b/mm/swap.h
index efb6d7ff9f30..bb2adbfd64a9 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -52,6 +52,29 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
+/**
+ * folio_contains_swap - Does this folio contain this swap entry?
+ * @folio: The folio.
+ * @entry: The swap entry to check against.
+ *
+ * Swap version of folio_contains()
+ *
+ * Context: The caller should have the folio locked to ensure
+ * nothing will move it out of the swap cache.
+ * Return: true or false.
+ */
+static inline bool folio_contains_swap(struct folio *folio, swp_entry_t entry)
+{
+ pgoff_t offset = swp_offset(entry);
+
+ VM_WARN_ON_ONCE(!folio_test_locked(folio));
+ if (unlikely(!folio_test_swapcache(folio)))
+ return false;
+ if (unlikely(swp_type(entry) != swp_type(folio->swap)))
+ return false;
+ return offset - swp_offset(folio->swap) < folio_nr_pages(folio);
+}
+
void show_swap_cache_info(void);
void *get_shadow_from_swap_cache(swp_entry_t entry);
int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
@@ -144,6 +167,11 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
return 0;
}
+static inline bool folio_contains_swap(struct folio *folio, swp_entry_t entry)
+{
+ return false;
+}
+
static inline void show_swap_cache_info(void)
{
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ff9eb761a103..be0d96494dc1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -70,10 +70,12 @@ void show_swap_cache_info(void)
}
/*
- * Lookup a swap entry in the swap cache. A found folio will be returned
- * unlocked and with its refcount incremented.
+ * swap_cache_get_folio - Lookup a swap entry in the swap cache.
*
- * Caller must lock the swap device or hold a reference to keep it valid.
+ * A found folio will be returned unlocked and with its refcount increased.
+ *
+ * Context: Caller must ensure @entry is valid and pin the swap device, also
+ * check the returned folio after locking it (e.g. folio_swap_contains).
*/
struct folio *swap_cache_get_folio(swp_entry_t entry)
{
@@ -338,7 +340,10 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
for (;;) {
int err;
- /* Check the swap cache in case the folio is already there */
+ /*
+ * Check the swap cache first, if a cached folio is found,
+ * return it unlocked. The caller will lock and check it.
+ */
folio = swap_cache_get_folio(entry);
if (folio)
goto got_folio;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4b8ab2cb49ca..12f2580ebe8d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -240,12 +240,12 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* Offset could point to the middle of a large folio, or folio
* may no longer point to the expected offset before it's locked.
*/
- entry = folio->swap;
- if (offset < swp_offset(entry) || offset >= swp_offset(entry) + nr_pages) {
+ if (!folio_contains_swap(folio, entry)) {
folio_unlock(folio);
folio_put(folio);
goto again;
}
+ entry = folio->swap;
offset = swp_offset(entry);
need_reclaim = ((flags & TTRS_ANYWAY) ||
@@ -2150,6 +2150,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
}
folio_lock(folio);
+ if (!folio_contains_swap(folio, entry)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ continue;
+ }
+
folio_wait_writeback(folio);
ret = unuse_pte(vma, pmd, addr, entry, folio);
if (ret < 0) {
--
2.51.0
Powered by blists - more mailing lists