[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJD7tkZC8e8ZTBSOGZH-1srTeC=jqxwWchd-BjvNsV2FR0oT8Q@mail.gmail.com>
Date: Tue, 11 Jun 2024 10:51:08 -0700
From: Yosry Ahmed <yosryahmed@...gle.com>
To: Usama Arif <usamaarif642@...il.com>
Cc: 21cnbao@...il.com, akpm@...ux-foundation.org, hannes@...xchg.org,
david@...hat.com, ying.huang@...el.com, hughd@...gle.com, willy@...radead.org,
nphamcs@...il.com, chengming.zhou@...ux.dev, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, kernel-team@...a.com,
Shakeel Butt <shakeel.butt@...ux.dev>
Subject: Re: [PATCH v3 1/2] mm: store zero pages to be swapped out in a bitmap
[..]
> >> I think its better to handle this in Barrys patch. I feel this series is
> >> close to its final state, i.e. the only diff I have for the next
> >> revision is below to remove start/end_writeback for zer_filled case. I
> >> will comment on Barrys patch once the I send out the next revision of this.
> > Sorry I did not make myself clearer. I did not mean that you should
> > handle the large folio swapin here. This needs to be handled at a
> > higher level because as you mentioned, a large folio may be partially
> > in the zeromap, zswap, swapcache, disk, etc.
> >
> > What I meant is that we should probably have a debug check to make
> > sure this doesn't go unhandled. For zswap, I am trying to add a
> > warning and fail the swapin operation if a large folio slips through
> > to zswap. We can do something similar here if folks agree this is the
> > right way in the interim:
> > https://lore.kernel.org/lkml/20240611024516.1375191-3-yosryahmed@google.com/.
> >
> > Maybe I am too paranoid, but I think it's easy to mess up these things
> > when working on large folio swapin imo.
>
> So there is a difference between zswap and this optimization. In this
> optimization, if the zeromap is set for all the folio bits, then we
> should do large folio swapin. There still needs to be a change in Barrys
> patch in alloc_swap_folio, but apart from that does the below diff over
> v3 make it better? I will send a v4 with this if it sounds good.
>
>
> diff --git a/mm/page_io.c b/mm/page_io.c
> index 6400be6e4291..bf01364748a9 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -234,18 +234,24 @@ static void swap_zeromap_folio_clear(struct folio
> *folio)
> }
> }
>
> -static bool swap_zeromap_folio_test(struct folio *folio)
> +/*
> + * Return the index of the first subpage which is not zero-filled
> + * according to swap_info_struct->zeromap.
> + * If all pages are zero-filled according to zeromap, it will return
> + * folio_nr_pages(folio).
> + */
> +static long swap_zeromap_folio_test(struct folio *folio)
> {
> struct swap_info_struct *sis = swp_swap_info(folio->swap);
> swp_entry_t entry;
> - unsigned int i;
> + long i;
Why long?
>
> for (i = 0; i < folio_nr_pages(folio); i++) {
> entry = page_swap_entry(folio_page(folio, i));
> if (!test_bit(swp_offset(entry), sis->zeromap))
> - return false;
> + return i;
> }
> - return true;
> + return i;
> }
>
> /*
> @@ -581,6 +587,7 @@ void swap_read_folio(struct folio *folio, bool
> synchronous,
> {
> struct swap_info_struct *sis = swp_swap_info(folio->swap);
> bool workingset = folio_test_workingset(folio);
> + long first_non_zero_page_idx;
> unsigned long pflags;
> bool in_thrashing;
>
> @@ -598,10 +605,19 @@ void swap_read_folio(struct folio *folio, bool
> synchronous,
> psi_memstall_enter(&pflags);
> }
> delayacct_swapin_start();
> - if (swap_zeromap_folio_test(folio)) {
> + first_non_zero_page_idx = swap_zeromap_folio_test(folio);
> + if (first_non_zero_page_idx == folio_nr_pages(folio)) {
> folio_zero_fill(folio);
> folio_mark_uptodate(folio);
> folio_unlock(folio);
> + } else if (first_non_zero_page_idx != 0) {
> + /*
> + * The case for when only *some* of subpages being
> swapped-in were recorded
> + * in sis->zeromap, while the rest are in zswap/disk is
> currently not handled.
> + * WARN in this case and return without marking the
> folio uptodate so that
> + * an IO error is emitted (e.g. do_swap_page() will sigbus).
> + */
> + WARN_ON_ONCE(1);
> } else if (zswap_load(folio)) {
> folio_mark_uptodate(folio);
> folio_unlock(folio);
>
>
This is too much noise for swap_read_folio(). How about adding
swap_read_folio_zeromap() that takes care of this and decides whether
or not to call folio_mark_uptodate()?
-static bool swap_zeromap_folio_test(struct folio *folio)
+/*
+ * Return the index of the first subpage which is not zero-filled according to
+ * swap_info_struct->zeromap. If all pages are zero-filled according to
+ * zeromap, it will return folio_nr_pages(folio).
+ */
+static unsigned int swap_zeromap_folio_test(struct folio *folio)
{
struct swap_info_struct *sis = swp_swap_info(folio->swap);
swp_entry_t entry;
@@ -243,9 +248,9 @@ static bool swap_zeromap_folio_test(struct folio *folio)
for (i = 0; i < folio_nr_pages(folio); i++) {
entry = page_swap_entry(folio_page(folio, i));
if (!test_bit(swp_offset(entry), sis->zeromap))
- return false;
+ return i;
}
- return true;
+ return i;
}
/*
@@ -511,6 +516,25 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
mempool_free(sio, sio_pool);
}
+static bool swap_read_folio_zeromap(struct folio *folio)
+{
+ unsigned int idx = swap_zeromap_folio_test(folio);
+
+ if (idx == 0)
+ return false;
+
+ /*
+ * Swapping in a large folio that is partially in the zeromap is not
+ * currently handled. Return true without marking the folio uptodate so
+ * that an IO error is emitted (e.g. do_swap_page() will sigbus).
+ */
+ if (WARN_ON_ONCE(idx < folio_nr_pages(folio)))
+ return true;
+
+ folio_zero_fill(folio);
+ folio_mark_uptodate(folio);
+ return true
+}
+
static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
{
struct swap_info_struct *sis = swp_swap_info(folio->swap);
@@ -600,9 +624,7 @@ void swap_read_folio(struct folio *folio, bool synchronous,
psi_memstall_enter(&pflags);
}
delayacct_swapin_start();
- if (swap_zeromap_folio_test(folio)) {
- folio_zero_fill(folio);
- folio_mark_uptodate(folio);
+ if (swap_read_folio_zeromap(folio)) {
folio_unlock(folio);
} else if (zswap_load(folio)) {
folio_mark_uptodate(folio);
Powered by blists - more mailing lists