[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4zsUCN2vgN6kgPmWZiORgH2d9g8h9kLBYsL+hVQRZhHCg@mail.gmail.com>
Date: Sat, 3 Aug 2024 18:38:41 +0800
From: Barry Song <baohua@...nel.org>
To: chrisl@...nel.org
Cc: Andrew Morton <akpm@...ux-foundation.org>, Kairui Song <kasong@...cent.com>,
Hugh Dickins <hughd@...gle.com>, Ryan Roberts <ryan.roberts@....com>,
"Huang, Ying" <ying.huang@...el.com>, Kalesh Singh <kaleshsingh@...gle.com>,
linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [PATCH v5 6/9] mm: swap: allow cache reclaim to skip slot cache
On Wed, Jul 31, 2024 at 2:49 PM <chrisl@...nel.org> wrote:
>
> From: Kairui Song <kasong@...cent.com>
>
> Currently we free the reclaimed slots through slot cache even
> if the slot is required to be empty immediately. As a result
> the reclaim caller will see the slot still occupied even after a
> successful reclaim, and need to keep reclaiming until slot cache
> get flushed. This caused ineffective or over reclaim when SWAP is
> under stress.
>
> So introduce a new flag allowing the slot to be emptied bypassing
> the slot cache.
>
> Signed-off-by: Kairui Song <kasong@...cent.com>
> ---
> mm/swapfile.c | 152 +++++++++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 109 insertions(+), 43 deletions(-)
>
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 9b63b2262cc2..4c0fc0409d3c 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -53,8 +53,15 @@
> static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
> unsigned char);
> static void free_swap_count_continuations(struct swap_info_struct *);
> +static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry,
> + unsigned int nr_pages);
> static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
> unsigned int nr_entries);
> +static bool folio_swapcache_freeable(struct folio *folio);
> +static struct swap_cluster_info *lock_cluster_or_swap_info(
> + struct swap_info_struct *si, unsigned long offset);
> +static void unlock_cluster_or_swap_info(struct swap_info_struct *si,
> + struct swap_cluster_info *ci);
>
> static DEFINE_SPINLOCK(swap_lock);
> static unsigned int nr_swapfiles;
> @@ -129,8 +136,25 @@ static inline unsigned char swap_count(unsigned char ent)
> * corresponding page
> */
> #define TTRS_UNMAPPED 0x2
> -/* Reclaim the swap entry if swap is getting full*/
> +/* Reclaim the swap entry if swap is getting full */
> #define TTRS_FULL 0x4
> +/* Reclaim directly, bypass the slot cache and don't touch device lock */
> +#define TTRS_DIRECT 0x8
> +
> +static bool swap_is_has_cache(struct swap_info_struct *si,
> + unsigned long offset, int nr_pages)
> +{
> + unsigned char *map = si->swap_map + offset;
> + unsigned char *map_end = map + nr_pages;
> +
> + do {
> + VM_BUG_ON(!(*map & SWAP_HAS_CACHE));
> + if (*map != SWAP_HAS_CACHE)
> + return false;
> + } while (++map < map_end);
> +
> + return true;
> +}
>
> /*
> * returns number of pages in the folio that backs the swap entry. If positive,
> @@ -141,12 +165,22 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
> unsigned long offset, unsigned long flags)
> {
> swp_entry_t entry = swp_entry(si->type, offset);
> + struct address_space *address_space = swap_address_space(entry);
> + struct swap_cluster_info *ci;
> struct folio *folio;
> - int ret = 0;
> + int ret, nr_pages;
> + bool need_reclaim;
>
> - folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry));
> + folio = filemap_get_folio(address_space, swap_cache_index(entry));
> if (IS_ERR(folio))
> return 0;
> +
> + /* offset could point to the middle of a large folio */
> + entry = folio->swap;
> + offset = swp_offset(entry);
> + nr_pages = folio_nr_pages(folio);
> + ret = -nr_pages;
> +
> /*
> * When this function is called from scan_swap_map_slots() and it's
> * called by vmscan.c at reclaiming folios. So we hold a folio lock
> @@ -154,14 +188,50 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
> * case and you should use folio_free_swap() with explicit folio_lock()
> * in usual operations.
> */
> - if (folio_trylock(folio)) {
> - if ((flags & TTRS_ANYWAY) ||
> - ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) ||
> - ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio)))
> - ret = folio_free_swap(folio);
> - folio_unlock(folio);
> + if (!folio_trylock(folio))
> + goto out;
> +
> + need_reclaim = ((flags & TTRS_ANYWAY) ||
> + ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) ||
> + ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio)));
> + if (!need_reclaim || !folio_swapcache_freeable(folio))
> + goto out_unlock;
> +
> + /*
> + * It's safe to delete the folio from swap cache only if the folio's
> + * swap_map is HAS_CACHE only, which means the slots have no page table
> + * reference or pending writeback, and can't be allocated to others.
> + */
> + ci = lock_cluster_or_swap_info(si, offset);
> + need_reclaim = swap_is_has_cache(si, offset, nr_pages);
> + unlock_cluster_or_swap_info(si, ci);
> + if (!need_reclaim)
> + goto out_unlock;
> +
> + if (!(flags & TTRS_DIRECT)) {
> + /* Free through slot cache */
> + delete_from_swap_cache(folio);
> + folio_set_dirty(folio);
> + ret = nr_pages;
> + goto out_unlock;
> }
> - ret = ret ? folio_nr_pages(folio) : -folio_nr_pages(folio);
> +
> + xa_lock_irq(&address_space->i_pages);
> + __delete_from_swap_cache(folio, entry, NULL);
> + xa_unlock_irq(&address_space->i_pages);
> + folio_ref_sub(folio, nr_pages);
> + folio_set_dirty(folio);
> +
> + spin_lock(&si->lock);
> + /* Only sinple page folio can be backed by zswap */
> + if (!nr_pages)
> + zswap_invalidate(entry);
I am trying to figure out if I am mad :-) Does nr_pages == 0 means single
page folio?
> + swap_entry_range_free(si, entry, nr_pages);
> + spin_unlock(&si->lock);
> + ret = nr_pages;
> +out_unlock:
> + folio_unlock(folio);
> +out:
> folio_put(folio);
> return ret;
> }
> @@ -903,7 +973,7 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
> if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
> int swap_was_freed;
> spin_unlock(&si->lock);
> - swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
> + swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT);
> spin_lock(&si->lock);
> /* entry was freed successfully, try to use this again */
> if (swap_was_freed > 0)
> @@ -1340,9 +1410,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
> unsigned long offset = swp_offset(entry);
> struct swap_cluster_info *ci;
> struct swap_info_struct *si;
> - unsigned char *map;
> - unsigned int i, free_entries = 0;
> - unsigned char val;
> int size = 1 << swap_entry_order(folio_order(folio));
>
> si = _swap_info_get(entry);
> @@ -1350,23 +1417,14 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
> return;
>
> ci = lock_cluster_or_swap_info(si, offset);
> - if (size > 1) {
> - map = si->swap_map + offset;
> - for (i = 0; i < size; i++) {
> - val = map[i];
> - VM_BUG_ON(!(val & SWAP_HAS_CACHE));
> - if (val == SWAP_HAS_CACHE)
> - free_entries++;
> - }
> - if (free_entries == size) {
> - unlock_cluster_or_swap_info(si, ci);
> - spin_lock(&si->lock);
> - swap_entry_range_free(si, entry, size);
> - spin_unlock(&si->lock);
> - return;
> - }
> + if (size > 1 && swap_is_has_cache(si, offset, size)) {
> + unlock_cluster_or_swap_info(si, ci);
> + spin_lock(&si->lock);
> + swap_entry_range_free(si, entry, size);
> + spin_unlock(&si->lock);
> + return;
> }
> - for (i = 0; i < size; i++, entry.val++) {
> + for (int i = 0; i < size; i++, entry.val++) {
> if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
> unlock_cluster_or_swap_info(si, ci);
> free_swap_slot(entry);
> @@ -1526,16 +1584,7 @@ static bool folio_swapped(struct folio *folio)
> return swap_page_trans_huge_swapped(si, entry, folio_order(folio));
> }
>
> -/**
> - * folio_free_swap() - Free the swap space used for this folio.
> - * @folio: The folio to remove.
> - *
> - * If swap is getting full, or if there are no more mappings of this folio,
> - * then call folio_free_swap to free its swap space.
> - *
> - * Return: true if we were able to release the swap space.
> - */
> -bool folio_free_swap(struct folio *folio)
> +static bool folio_swapcache_freeable(struct folio *folio)
> {
> VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
>
> @@ -1543,8 +1592,6 @@ bool folio_free_swap(struct folio *folio)
> return false;
> if (folio_test_writeback(folio))
> return false;
> - if (folio_swapped(folio))
> - return false;
>
> /*
> * Once hibernation has begun to create its image of memory,
> @@ -1564,6 +1611,25 @@ bool folio_free_swap(struct folio *folio)
> if (pm_suspended_storage())
> return false;
>
> + return true;
> +}
> +
> +/**
> + * folio_free_swap() - Free the swap space used for this folio.
> + * @folio: The folio to remove.
> + *
> + * If swap is getting full, or if there are no more mappings of this folio,
> + * then call folio_free_swap to free its swap space.
> + *
> + * Return: true if we were able to release the swap space.
> + */
> +bool folio_free_swap(struct folio *folio)
> +{
> + if (!folio_swapcache_freeable(folio))
> + return false;
> + if (folio_swapped(folio))
> + return false;
> +
> delete_from_swap_cache(folio);
> folio_set_dirty(folio);
> return true;
> @@ -1640,7 +1706,7 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr)
> * to the next boundary.
> */
> nr = __try_to_reclaim_swap(si, offset,
> - TTRS_UNMAPPED | TTRS_FULL);
> + TTRS_UNMAPPED | TTRS_FULL);
> if (nr == 0)
> nr = 1;
> else if (nr < 0)
>
> --
> 2.46.0.rc1.232.g9752f9e123-goog
>
Powered by blists - more mailing lists