lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9e0fc33a-2e2b-4366-ae1b-231038dfd2be@amd.com>
Date: Wed, 22 Jan 2025 14:29:19 +0530
From: Bharata B Rao <bharata@....com>
To: Vinay Banakar <vny@...gle.com>, linux-mm@...ck.org,
 linux-kernel@...r.kernel.org
Cc: akpm@...ux-foundation.org, willy@...radead.org, mgorman@...e.de,
 Wei Xu <weixugc@...gle.com>, Greg Thelen <gthelen@...gle.com>
Subject: Re: [PATCH] mm: Optimize TLB flushes during page reclaim

On 21-Jan-25 5:35 AM, Vinay Banakar wrote:
> Sorry, the previous patch was unreadable due to damaged whitespace.
> Here is the same patch with fixed indentation.
> 
> Signed-off-by: Vinay Banakar <vny@...gle.com>
> ---
>   mm/vmscan.c | 107
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
>   1 file changed, 74 insertions(+), 33 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index bd489c1af..1bd510622 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1035,6 +1035,7 @@ static unsigned int shrink_folio_list(struct
> list_head *folio_list,
>          struct folio_batch free_folios;
>          LIST_HEAD(ret_folios);
>          LIST_HEAD(demote_folios);
> +       LIST_HEAD(pageout_list);
>          unsigned int nr_reclaimed = 0;
>          unsigned int pgactivate = 0;
>          bool do_demote_pass;
> @@ -1351,39 +1352,9 @@ static unsigned int shrink_folio_list(struct
> list_head *folio_list,
>                          if (!sc->may_writepage)
>                                  goto keep_locked;
> 
> -                       /*
> -                        * Folio is dirty. Flush the TLB if a writable entry
> -                        * potentially exists to avoid CPU writes after I/O
> -                        * starts and then write it out here.
> -                        */
> -                       try_to_unmap_flush_dirty();
> -                       switch (pageout(folio, mapping, &plug)) {
> -                       case PAGE_KEEP:
> -                               goto keep_locked;
> -                       case PAGE_ACTIVATE:
> -                               goto activate_locked;
> -                       case PAGE_SUCCESS:
> -                               stat->nr_pageout += nr_pages;
> -
> -                               if (folio_test_writeback(folio))
> -                                       goto keep;
> -                               if (folio_test_dirty(folio))
> -                                       goto keep;
> -
> -                               /*
> -                                * A synchronous write - probably a ramdisk.  Go
> -                                * ahead and try to reclaim the folio.
> -                                */
> -                               if (!folio_trylock(folio))
> -                                       goto keep;
> -                               if (folio_test_dirty(folio) ||
> -                                   folio_test_writeback(folio))
> -                                       goto keep_locked;
> -                               mapping = folio_mapping(folio);
> -                               fallthrough;
> -                       case PAGE_CLEAN:
> -                               ; /* try to free the folio below */
> -                       }
> +                       /* Add to pageout list for defered bio submissions */
> +                       list_add(&folio->lru, &pageout_list);
> +                       continue;

The dirty pages are collected in a list here...

>          }
> 
>          /*
> @@ -1494,6 +1465,76 @@ static unsigned int shrink_folio_list(struct
> list_head *folio_list,
>          }
>          /* 'folio_list' is always empty here */
> 
> +       if (!list_empty(&pageout_list)) {
> +               /*
> +               * Batch TLB flushes by flushing once before processing
> all dirty pages.
> +               * Since we operate on one PMD at a time, this batches
> TLB flushes at
> +               * PMD granularity rather than per-page, reducing IPIs.
> +               */
> +               struct address_space *mapping;
> +               try_to_unmap_flush_dirty();

and one flush request is issued for the entire list. Where is the PMD 
level (512) batching done? Is that implicit elsewhere in the flow?

> +
> +               while (!list_empty(&pageout_list)) {
> +                       struct folio *folio = lru_to_folio(&pageout_list);
> +                       list_del(&folio->lru);
> +
> +                       /* Recheck if page got reactivated */
> +                       if (folio_test_active(folio) ||
> +                           (folio_mapped(folio) && folio_test_young(folio)))
> +                               goto skip_pageout_locked;
> +
> +                       mapping = folio_mapping(folio);
> +                       pageout_t pageout_res = pageout(folio, mapping, &plug);
> +                       switch (pageout_res) {
> +                       case PAGE_KEEP:
> +                               goto skip_pageout_locked;
> +                       case PAGE_ACTIVATE:
> +                               goto skip_pageout_locked;
> +                       case PAGE_SUCCESS:
> +                               stat->nr_pageout += folio_nr_pages(folio);
> +
> +                               if (folio_test_writeback(folio) ||
> +                                   folio_test_dirty(folio))
> +                                       goto skip_pageout;
> +
> +                               /*
> +                                * A synchronous write - probably a ramdisk.  Go
> +                                * ahead and try to reclaim the folio.
> +                                */
> +                               if (!folio_trylock(folio))
> +                                       goto skip_pageout;
> +                               if (folio_test_dirty(folio) ||
> +                                   folio_test_writeback(folio))
> +                                       goto skip_pageout_locked;
> +
> +                               // Try to free the page
> +                               if (!mapping ||
> +                                   !__remove_mapping(mapping, folio, true,
> +                                                   sc->target_mem_cgroup))
> +                                       goto skip_pageout_locked;
> +
> +                               nr_reclaimed += folio_nr_pages(folio);
> +                               folio_unlock(folio);
> +                               continue;
> +
> +                       case PAGE_CLEAN:
> +                               if (!mapping ||
> +                                   !__remove_mapping(mapping, folio, true,
> +                                                   sc->target_mem_cgroup))
> +                                       goto skip_pageout_locked;
> +
> +                               nr_reclaimed += folio_nr_pages(folio);
> +                               folio_unlock(folio);
> +                               continue;
> +                       }
> +
> +skip_pageout_locked:
> +                       folio_unlock(folio);
> +skip_pageout:
> +                       list_add(&folio->lru, &ret_folios);
> +               }
> +       }
> +
>          /* Migrate folios selected for demotion */
>          nr_reclaimed += demote_folio_list(&demote_folios, pgdat);
>          /* Folios that could not be demoted are still in @demote_folios */

Regards,
Bharata.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ