linux-kernel - Re: [PATCH v4 13/19] mm, swap: remove workaround for unsynchronized swap map cache state

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aUN29S2CDv0KbfXj@MiWiFi-R3L-srv>
Date: Thu, 18 Dec 2025 11:37:25 +0800
From: Baoquan He <bhe@...hat.com>
To: Kairui Song <ryncsn@...il.com>
Cc: linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
	Barry Song <baohua@...nel.org>, Chris Li <chrisl@...nel.org>,
	Nhat Pham <nphamcs@...il.com>, Yosry Ahmed <yosry.ahmed@...ux.dev>,
	David Hildenbrand <david@...nel.org>,
	Johannes Weiner <hannes@...xchg.org>,
	Youngjun Park <youngjun.park@....com>,
	Hugh Dickins <hughd@...gle.com>,
	Baolin Wang <baolin.wang@...ux.alibaba.com>,
	Ying Huang <ying.huang@...ux.alibaba.com>,
	Kemeng Shi <shikemeng@...weicloud.com>,
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	"Matthew Wilcox (Oracle)" <willy@...radead.org>,
	linux-kernel@...r.kernel.org, Kairui Song <kasong@...cent.com>
Subject: Re: [PATCH v4 13/19] mm, swap: remove workaround for unsynchronized
 swap map cache state

On 12/05/25 at 03:29am, Kairui Song wrote:
> From: Kairui Song <kasong@...cent.com>
> 
> Remove the "skip if exists" check from commit a65b0e7607ccb ("zswap:
> make shrinking memcg-aware"). It was needed because there is a tiny time
> window between setting the SWAP_HAS_CACHE bit and actually adding the
> folio to the swap cache. If a user is trying to add the folio into the
> swap cache but another user was interrupted after setting SWAP_HAS_CACHE
> but hasn't added the folio to the swap cache yet, it might lead to a
> deadlock.
> 
> We have moved the bit setting to the same critical section as adding the
> folio, so this is no longer needed. Remove it and clean it up.
> 
> Signed-off-by: Kairui Song <kasong@...cent.com>
> ---
>  mm/swap.h       |  2 +-
>  mm/swap_state.c | 27 ++++++++++-----------------
>  mm/zswap.c      |  2 +-
>  3 files changed, 12 insertions(+), 19 deletions(-)

Reviewed-by: Baoquan He <bhe@...hat.com>

> 
> diff --git a/mm/swap.h b/mm/swap.h
> index b5075a1aee04..6777b2ab9d92 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -260,7 +260,7 @@ int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
>  void swap_cache_del_folio(struct folio *folio);
>  struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
>  				     struct mempolicy *mpol, pgoff_t ilx,
> -				     bool *alloced, bool skip_if_exists);
> +				     bool *alloced);
>  /* Below helpers require the caller to lock and pass in the swap cluster. */
>  void __swap_cache_del_folio(struct swap_cluster_info *ci,
>  			    struct folio *folio, swp_entry_t entry, void *shadow);
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index df7df8b75e52..1a69ba3be87f 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -445,8 +445,6 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
>   * @folio: folio to be added.
>   * @gfp: memory allocation flags for charge, can be 0 if @charged if true.
>   * @charged: if the folio is already charged.
> - * @skip_if_exists: if the slot is in a cached state, return NULL.
> - *                  This is an old workaround that will be removed shortly.
>   *
>   * Update the swap_map and add folio as swap cache, typically before swapin.
>   * All swap slots covered by the folio must have a non-zero swap count.
> @@ -457,8 +455,7 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
>   */
>  static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
>  						  struct folio *folio,
> -						  gfp_t gfp, bool charged,
> -						  bool skip_if_exists)
> +						  gfp_t gfp, bool charged)
>  {
>  	struct folio *swapcache = NULL;
>  	void *shadow;
> @@ -478,7 +475,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
>  		 * might return a folio that is irrelevant to the faulting
>  		 * entry because @entry is aligned down. Just return NULL.
>  		 */
> -		if (ret != -EEXIST || skip_if_exists || folio_test_large(folio))
> +		if (ret != -EEXIST || folio_test_large(folio))
>  			goto failed;
>  
>  		swapcache = swap_cache_get_folio(entry);
> @@ -511,8 +508,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
>   * @mpol: NUMA memory allocation policy to be applied
>   * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
>   * @new_page_allocated: sets true if allocation happened, false otherwise
> - * @skip_if_exists: if the slot is a partially cached state, return NULL.
> - *                  This is a workaround that would be removed shortly.
>   *
>   * Allocate a folio in the swap cache for one swap slot, typically before
>   * doing IO (e.g. swap in or zswap writeback). The swap slot indicated by
> @@ -525,8 +520,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
>   */
>  struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
>  				     struct mempolicy *mpol, pgoff_t ilx,
> -				     bool *new_page_allocated,
> -				     bool skip_if_exists)
> +				     bool *new_page_allocated)
>  {
>  	struct swap_info_struct *si = __swap_entry_to_info(entry);
>  	struct folio *folio;
> @@ -547,8 +541,7 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
>  	if (!folio)
>  		return NULL;
>  	/* Try add the new folio, returns existing folio or NULL on failure. */
> -	result = __swap_cache_prepare_and_add(entry, folio, gfp_mask,
> -					      false, skip_if_exists);
> +	result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false);
>  	if (result == folio)
>  		*new_page_allocated = true;
>  	else
> @@ -577,7 +570,7 @@ struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
>  	unsigned long nr_pages = folio_nr_pages(folio);
>  
>  	entry = swp_entry(swp_type(entry), round_down(offset, nr_pages));
> -	swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true, false);
> +	swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true);
>  	if (swapcache == folio)
>  		swap_read_folio(folio, NULL);
>  	return swapcache;
> @@ -605,7 +598,7 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>  
>  	mpol = get_vma_policy(vma, addr, 0, &ilx);
>  	folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
> -					&page_allocated, false);
> +				       &page_allocated);
>  	mpol_cond_put(mpol);
>  
>  	if (page_allocated)
> @@ -724,7 +717,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
>  		/* Ok, do the async read-ahead now */
>  		folio = swap_cache_alloc_folio(
>  			swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx,
> -			&page_allocated, false);
> +			&page_allocated);
>  		if (!folio)
>  			continue;
>  		if (page_allocated) {
> @@ -742,7 +735,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
>  skip:
>  	/* The page was likely read above, so no need for plugging here */
>  	folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
> -					&page_allocated, false);
> +				       &page_allocated);
>  	if (unlikely(page_allocated))
>  		swap_read_folio(folio, NULL);
>  	return folio;
> @@ -847,7 +840,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
>  				continue;
>  		}
>  		folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
> -						&page_allocated, false);
> +					       &page_allocated);
>  		if (si)
>  			put_swap_device(si);
>  		if (!folio)
> @@ -869,7 +862,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
>  skip:
>  	/* The folio was likely read above, so no need for plugging here */
>  	folio = swap_cache_alloc_folio(targ_entry, gfp_mask, mpol, targ_ilx,
> -					&page_allocated, false);
> +				       &page_allocated);
>  	if (unlikely(page_allocated))
>  		swap_read_folio(folio, NULL);
>  	return folio;
> diff --git a/mm/zswap.c b/mm/zswap.c
> index a7a2443912f4..d8a33db9d3cc 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1015,7 +1015,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>  
>  	mpol = get_task_policy(current);
>  	folio = swap_cache_alloc_folio(swpentry, GFP_KERNEL, mpol,
> -				       NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
> +				       NO_INTERLEAVE_INDEX, &folio_was_allocated);
>  	put_swap_device(si);
>  	if (!folio)
>  		return -ENOMEM;
> 
> -- 
> 2.52.0
>