[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250514201729.48420-6-ryncsn@gmail.com>
Date: Thu, 15 May 2025 04:17:05 +0800
From: Kairui Song <ryncsn@...il.com>
To: linux-mm@...ck.org
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Matthew Wilcox <willy@...radead.org>,
Hugh Dickins <hughd@...gle.com>,
Chris Li <chrisl@...nel.org>,
David Hildenbrand <david@...hat.com>,
Yosry Ahmed <yosryahmed@...gle.com>,
"Huang, Ying" <ying.huang@...ux.alibaba.com>,
Nhat Pham <nphamcs@...il.com>,
Johannes Weiner <hannes@...xchg.org>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Baoquan He <bhe@...hat.com>,
Barry Song <baohua@...nel.org>,
Kalesh Singh <kaleshsingh@...gle.com>,
Kemeng Shi <shikemeng@...weicloud.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Ryan Roberts <ryan.roberts@....com>,
linux-kernel@...r.kernel.org,
Kairui Song <kasong@...cent.com>
Subject: [PATCH 05/28] mm, swap: sanitize swap cache lookup convention
From: Kairui Song <kasong@...cent.com>
Swap cache lookup is lock less, the returned folio could be invalidated
any time before locking it. So the caller always have to lock and check
the folio before use.
Introduce a helper for swap cache folio checking, document this convention,
and avoid touching the folio until the folio has been verified.
And update all current users using this convention.
Signed-off-by: Kairui Song <kasong@...cent.com>
---
mm/memory.c | 31 ++++++++++++++-----------------
mm/shmem.c | 4 ++--
mm/swap.h | 21 +++++++++++++++++++++
mm/swap_state.c | 8 ++++++--
mm/swapfile.c | 10 ++++++++--
mm/userfaultfd.c | 4 ++++
6 files changed, 55 insertions(+), 23 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 18b5a77a0a4b..254be0e88801 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4568,12 +4568,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out;
folio = swap_cache_get_folio(entry);
- if (folio) {
- swap_update_readahead(folio, vma, vmf->address);
- page = folio_file_page(folio, swp_offset(entry));
- }
swapcache = folio;
-
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
__swap_count(entry) == 1) {
@@ -4642,20 +4637,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
- page = folio_file_page(folio, swp_offset(entry));
- } else if (PageHWPoison(page)) {
- /*
- * hwpoisoned dirty swapcache pages are kept for killing
- * owner processes (which may be unknown at hwpoison time)
- */
- ret = VM_FAULT_HWPOISON;
- goto out_release;
}
ret |= folio_lock_or_retry(folio, vmf);
if (ret & VM_FAULT_RETRY)
goto out_release;
+ page = folio_file_page(folio, swp_offset(entry));
if (swapcache) {
/*
* Make sure folio_free_swap() or swapoff did not release the
@@ -4664,10 +4652,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* swapcache, we need to check that the page's swap has not
* changed.
*/
- if (unlikely(!folio_test_swapcache(folio) ||
- page_swap_entry(page).val != entry.val))
+ if (!folio_swap_contains(folio, entry))
goto out_page;
+ if (PageHWPoison(page)) {
+ /*
+ * hwpoisoned dirty swapcache pages are kept for killing
+ * owner processes (which may be unknown at hwpoison time)
+ */
+ ret = VM_FAULT_HWPOISON;
+ goto out_page;
+ }
+
+ swap_update_readahead(folio, vma, vmf->address);
+
/*
* KSM sometimes has to copy on read faults, for example, if
* page->index of !PageKSM() pages would be nonlinear inside the
@@ -4682,8 +4680,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
ret = VM_FAULT_HWPOISON;
folio = swapcache;
goto out_page;
- }
- if (folio != swapcache)
+ } else if (folio != swapcache)
page = folio_page(folio, 0);
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 01f29cb31c7a..43d9e3bf16f4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2260,8 +2260,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
/* Look it up and read it in.. */
folio = swap_cache_get_folio(swap);
- if (folio)
- swap_update_readahead(folio, NULL, 0);
order = xa_get_order(&mapping->i_pages, index);
if (!folio) {
bool fallback_order0 = false;
@@ -2362,6 +2360,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -EEXIST;
goto unlock;
}
+ if (!skip_swapcache)
+ swap_update_readahead(folio, NULL, 0);
if (!folio_test_uptodate(folio)) {
error = -EIO;
goto failed;
diff --git a/mm/swap.h b/mm/swap.h
index e83109ad1456..34af06bf6fa4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -50,6 +50,22 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
+/*
+ * Check if a folio still contains a swap entry, must be called after a
+ * swap cache lookup as the folio might have been invalidated while
+ * it's unlocked.
+ */
+static inline bool folio_swap_contains(struct folio *folio, swp_entry_t entry)
+{
+ pgoff_t index = swp_offset(entry);
+ VM_WARN_ON_ONCE(!folio_test_locked(folio));
+ if (unlikely(!folio_test_swapcache(folio)))
+ return false;
+ if (unlikely(swp_type(entry) != swp_type(folio->swap)))
+ return false;
+ return (index - swp_offset(folio->swap)) < folio_nr_pages(folio);
+}
+
void show_swap_cache_info(void);
void *get_shadow_from_swap_cache(swp_entry_t entry);
int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
@@ -123,6 +139,11 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
return 0;
}
+static inline bool folio_swap_contains(struct folio *folio, swp_entry_t entry)
+{
+ return false;
+}
+
static inline void show_swap_cache_info(void)
{
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bca201100138..07c41676486a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -170,7 +170,8 @@ void __delete_from_swap_cache(struct folio *folio,
* Lookup a swap entry in the swap cache. A found folio will be returned
* unlocked and with its refcount incremented.
*
- * Caller must hold a reference on the swap device.
+ * Caller must hold a reference of the swap device, and check if the
+ * returned folio is still valid after locking it (e.g. folio_swap_contains).
*/
struct folio *swap_cache_get_folio(swp_entry_t entry)
{
@@ -339,7 +340,10 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
for (;;) {
int err;
- /* Check the swap cache in case the folio is already there */
+ /*
+ * Check the swap cache first, if a cached folio is found,
+ * return it unlocked. The caller will lock and check it.
+ */
folio = swap_cache_get_folio(entry);
if (folio)
goto got_folio;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 29e918102355..aa031fd27847 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -240,12 +240,12 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* Offset could point to the middle of a large folio, or folio
* may no longer point to the expected offset before it's locked.
*/
- entry = folio->swap;
- if (offset < swp_offset(entry) || offset >= swp_offset(entry) + nr_pages) {
+ if (!folio_swap_contains(folio, entry)) {
folio_unlock(folio);
folio_put(folio);
goto again;
}
+ entry = folio->swap;
offset = swp_offset(entry);
need_reclaim = ((flags & TTRS_ANYWAY) ||
@@ -2117,6 +2117,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
}
folio_lock(folio);
+ if (!folio_swap_contains(folio, entry)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ continue;
+ }
+
folio_wait_writeback(folio);
ret = unuse_pte(vma, pmd, addr, entry, folio);
if (ret < 0) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e5a0db7f3331..5b4f01aecf35 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1409,6 +1409,10 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
goto retry;
}
}
+ if (!folio_swap_contains(src_folio, entry)) {
+ err = -EBUSY;
+ goto out;
+ }
err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte,
orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval,
dst_ptl, src_ptl, src_folio);
--
2.49.0
Powered by blists - more mailing lists