[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4zhRJdC7MH+3d9KfD1n3t4HiF8-OdWrKXUO7SH_H=1ZUQ@mail.gmail.com>
Date: Sun, 13 Jul 2025 18:53:02 +0800
From: Barry Song <21cnbao@...il.com>
To: Kairui Song <kasong@...cent.com>
Cc: linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
Hugh Dickins <hughd@...gle.com>, Baolin Wang <baolin.wang@...ux.alibaba.com>,
Matthew Wilcox <willy@...radead.org>, Kemeng Shi <shikemeng@...weicloud.com>,
Chris Li <chrisl@...nel.org>, Nhat Pham <nphamcs@...il.com>, Baoquan He <bhe@...hat.com>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v5 5/8] mm/shmem, swap: never use swap cache and readahead
for SWP_SYNCHRONOUS_IO
On Thu, Jul 10, 2025 at 11:37 AM Kairui Song <ryncsn@...il.com> wrote:
>
> From: Kairui Song <kasong@...cent.com>
>
> For SWP_SYNCHRONOUS_IO devices, if a cache bypassing THP swapin failed
> due to reasons like memory pressure, partially conflicting swap cache
> or ZSWAP enabled, shmem will fallback to cached order 0 swapin.
>
> Right now the swap cache still has a non-trivial overhead, and readahead
> is not helpful for SWP_SYNCHRONOUS_IO devices, so we should always skip
> the readahead and swap cache even if the swapin falls back to order 0.
>
> So handle the fallback logic without falling back to the cached read.
>
> Signed-off-by: Kairui Song <kasong@...cent.com>
> ---
> mm/shmem.c | 41 ++++++++++++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 13 deletions(-)
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 97db1097f7de..847e6f128485 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1982,6 +1982,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> struct shmem_inode_info *info = SHMEM_I(inode);
> int nr_pages = 1 << order;
> struct folio *new;
> + gfp_t alloc_gfp;
> void *shadow;
>
> /*
> @@ -1989,6 +1990,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> * limit chance of success with further cpuset and node constraints.
> */
> gfp &= ~GFP_CONSTRAINT_MASK;
> + alloc_gfp = gfp;
> if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
> if (WARN_ON_ONCE(order))
> return ERR_PTR(-EINVAL);
> @@ -2003,19 +2005,22 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> if ((vma && unlikely(userfaultfd_armed(vma))) ||
> !zswap_never_enabled() ||
> non_swapcache_batch(entry, nr_pages) != nr_pages)
> - return ERR_PTR(-EINVAL);
> + goto fallback;
>
> - gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp);
> + alloc_gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp);
> + }
> +retry:
> + new = shmem_alloc_folio(alloc_gfp, order, info, index);
> + if (!new) {
> + new = ERR_PTR(-ENOMEM);
> + goto fallback;
> }
> -
> - new = shmem_alloc_folio(gfp, order, info, index);
> - if (!new)
> - return ERR_PTR(-ENOMEM);
>
> if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL,
> - gfp, entry)) {
> + alloc_gfp, entry)) {
> folio_put(new);
> - return ERR_PTR(-ENOMEM);
> + new = ERR_PTR(-ENOMEM);
> + goto fallback;
> }
>
> /*
> @@ -2030,7 +2035,9 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> */
> if (swapcache_prepare(entry, nr_pages)) {
> folio_put(new);
> - return ERR_PTR(-EEXIST);
> + new = ERR_PTR(-EEXIST);
> + /* Try smaller folio to avoid cache conflict */
> + goto fallback;
> }
>
> __folio_set_locked(new);
> @@ -2044,6 +2051,15 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> folio_add_lru(new);
> swap_read_folio(new, NULL);
> return new;
> +fallback:
> + /* Order 0 swapin failed, nothing to fallback to, abort */
> + if (!order)
> + return new;
Feels a bit odd to me. Would it be possible to handle this earlier,
like:
if (!order)
return ERR_PTR(-ENOMEM);
goto fallback;
or:
if (order)
goto fallback;
return ERR_PTR(-ENOMEM);
Not strongly opinionated here—totally up to you.
Thanks
Barry
Powered by blists - more mailing lists