[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aUN29S2CDv0KbfXj@MiWiFi-R3L-srv>
Date: Thu, 18 Dec 2025 11:37:25 +0800
From: Baoquan He <bhe@...hat.com>
To: Kairui Song <ryncsn@...il.com>
Cc: linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
Barry Song <baohua@...nel.org>, Chris Li <chrisl@...nel.org>,
Nhat Pham <nphamcs@...il.com>, Yosry Ahmed <yosry.ahmed@...ux.dev>,
David Hildenbrand <david@...nel.org>,
Johannes Weiner <hannes@...xchg.org>,
Youngjun Park <youngjun.park@....com>,
Hugh Dickins <hughd@...gle.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Ying Huang <ying.huang@...ux.alibaba.com>,
Kemeng Shi <shikemeng@...weicloud.com>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
"Matthew Wilcox (Oracle)" <willy@...radead.org>,
linux-kernel@...r.kernel.org, Kairui Song <kasong@...cent.com>
Subject: Re: [PATCH v4 13/19] mm, swap: remove workaround for unsynchronized
swap map cache state
On 12/05/25 at 03:29am, Kairui Song wrote:
> From: Kairui Song <kasong@...cent.com>
>
> Remove the "skip if exists" check from commit a65b0e7607ccb ("zswap:
> make shrinking memcg-aware"). It was needed because there is a tiny time
> window between setting the SWAP_HAS_CACHE bit and actually adding the
> folio to the swap cache. If a user is trying to add the folio into the
> swap cache but another user was interrupted after setting SWAP_HAS_CACHE
> but hasn't added the folio to the swap cache yet, it might lead to a
> deadlock.
>
> We have moved the bit setting to the same critical section as adding the
> folio, so this is no longer needed. Remove it and clean it up.
>
> Signed-off-by: Kairui Song <kasong@...cent.com>
> ---
> mm/swap.h | 2 +-
> mm/swap_state.c | 27 ++++++++++-----------------
> mm/zswap.c | 2 +-
> 3 files changed, 12 insertions(+), 19 deletions(-)
Reviewed-by: Baoquan He <bhe@...hat.com>
>
> diff --git a/mm/swap.h b/mm/swap.h
> index b5075a1aee04..6777b2ab9d92 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -260,7 +260,7 @@ int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
> void swap_cache_del_folio(struct folio *folio);
> struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
> struct mempolicy *mpol, pgoff_t ilx,
> - bool *alloced, bool skip_if_exists);
> + bool *alloced);
> /* Below helpers require the caller to lock and pass in the swap cluster. */
> void __swap_cache_del_folio(struct swap_cluster_info *ci,
> struct folio *folio, swp_entry_t entry, void *shadow);
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index df7df8b75e52..1a69ba3be87f 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -445,8 +445,6 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
> * @folio: folio to be added.
> * @gfp: memory allocation flags for charge, can be 0 if @charged if true.
> * @charged: if the folio is already charged.
> - * @skip_if_exists: if the slot is in a cached state, return NULL.
> - * This is an old workaround that will be removed shortly.
> *
> * Update the swap_map and add folio as swap cache, typically before swapin.
> * All swap slots covered by the folio must have a non-zero swap count.
> @@ -457,8 +455,7 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
> */
> static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
> struct folio *folio,
> - gfp_t gfp, bool charged,
> - bool skip_if_exists)
> + gfp_t gfp, bool charged)
> {
> struct folio *swapcache = NULL;
> void *shadow;
> @@ -478,7 +475,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
> * might return a folio that is irrelevant to the faulting
> * entry because @entry is aligned down. Just return NULL.
> */
> - if (ret != -EEXIST || skip_if_exists || folio_test_large(folio))
> + if (ret != -EEXIST || folio_test_large(folio))
> goto failed;
>
> swapcache = swap_cache_get_folio(entry);
> @@ -511,8 +508,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
> * @mpol: NUMA memory allocation policy to be applied
> * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
> * @new_page_allocated: sets true if allocation happened, false otherwise
> - * @skip_if_exists: if the slot is a partially cached state, return NULL.
> - * This is a workaround that would be removed shortly.
> *
> * Allocate a folio in the swap cache for one swap slot, typically before
> * doing IO (e.g. swap in or zswap writeback). The swap slot indicated by
> @@ -525,8 +520,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
> */
> struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
> struct mempolicy *mpol, pgoff_t ilx,
> - bool *new_page_allocated,
> - bool skip_if_exists)
> + bool *new_page_allocated)
> {
> struct swap_info_struct *si = __swap_entry_to_info(entry);
> struct folio *folio;
> @@ -547,8 +541,7 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
> if (!folio)
> return NULL;
> /* Try add the new folio, returns existing folio or NULL on failure. */
> - result = __swap_cache_prepare_and_add(entry, folio, gfp_mask,
> - false, skip_if_exists);
> + result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false);
> if (result == folio)
> *new_page_allocated = true;
> else
> @@ -577,7 +570,7 @@ struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
> unsigned long nr_pages = folio_nr_pages(folio);
>
> entry = swp_entry(swp_type(entry), round_down(offset, nr_pages));
> - swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true, false);
> + swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true);
> if (swapcache == folio)
> swap_read_folio(folio, NULL);
> return swapcache;
> @@ -605,7 +598,7 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>
> mpol = get_vma_policy(vma, addr, 0, &ilx);
> folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
> - &page_allocated, false);
> + &page_allocated);
> mpol_cond_put(mpol);
>
> if (page_allocated)
> @@ -724,7 +717,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
> /* Ok, do the async read-ahead now */
> folio = swap_cache_alloc_folio(
> swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx,
> - &page_allocated, false);
> + &page_allocated);
> if (!folio)
> continue;
> if (page_allocated) {
> @@ -742,7 +735,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
> skip:
> /* The page was likely read above, so no need for plugging here */
> folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
> - &page_allocated, false);
> + &page_allocated);
> if (unlikely(page_allocated))
> swap_read_folio(folio, NULL);
> return folio;
> @@ -847,7 +840,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
> continue;
> }
> folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
> - &page_allocated, false);
> + &page_allocated);
> if (si)
> put_swap_device(si);
> if (!folio)
> @@ -869,7 +862,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
> skip:
> /* The folio was likely read above, so no need for plugging here */
> folio = swap_cache_alloc_folio(targ_entry, gfp_mask, mpol, targ_ilx,
> - &page_allocated, false);
> + &page_allocated);
> if (unlikely(page_allocated))
> swap_read_folio(folio, NULL);
> return folio;
> diff --git a/mm/zswap.c b/mm/zswap.c
> index a7a2443912f4..d8a33db9d3cc 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1015,7 +1015,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>
> mpol = get_task_policy(current);
> folio = swap_cache_alloc_folio(swpentry, GFP_KERNEL, mpol,
> - NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
> + NO_INTERLEAVE_INDEX, &folio_was_allocated);
> put_swap_device(si);
> if (!folio)
> return -ENOMEM;
>
> --
> 2.52.0
>
Powered by blists - more mailing lists