[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJD7tkYFcxtvD7GEQa3mDzKWURfseVsLvFh6m5yN36B8hefctg@mail.gmail.com>
Date: Mon, 6 Jan 2025 17:16:56 -0800
From: Yosry Ahmed <yosryahmed@...gle.com>
To: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
Cc: linux-kernel@...r.kernel.org, linux-mm@...ck.org, hannes@...xchg.org,
nphamcs@...il.com, chengming.zhou@...ux.dev, usamaarif642@...il.com,
ryan.roberts@....com, 21cnbao@...il.com, akpm@...ux-foundation.org,
linux-crypto@...r.kernel.org, herbert@...dor.apana.org.au,
davem@...emloft.net, clabbe@...libre.com, ardb@...nel.org,
ebiggers@...gle.com, surenb@...gle.com, kristen.c.accardi@...el.com,
wajdi.k.feghali@...el.com, vinodh.gopal@...el.com
Subject: Re: [PATCH v5 11/12] mm: zswap: Restructure & simplify zswap_store()
to make it amenable for batching.
On Fri, Dec 20, 2024 at 10:31 PM Kanchana P Sridhar
<kanchana.p.sridhar@...el.com> wrote:
>
> This patch introduces zswap_store_folio() that implements all the computes
> done earlier in zswap_store_page() for a single-page, for all the pages in
> a folio. This allows us to move the loop over the folio's pages from
> zswap_store() to zswap_store_folio().
>
> A distinct zswap_compress_folio() is also added, that simply calls
> zswap_compress() for each page in the folio it is called with.
The git diff looks funky, it may make things clearer to introduce
zswap_compress_folio() in a separate patch.
>
> zswap_store_folio() starts by allocating all zswap entries required to
> store the folio. Next, it calls zswap_compress_folio() and finally, adds
> the entries to the xarray and LRU.
>
> The error handling and cleanup required for all failure scenarios that can
> occur while storing a folio in zswap is now consolidated to a
> "store_folio_failed" label in zswap_store_folio().
>
> These changes facilitate developing support for compress batching in
> zswap_store_folio().
>
> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
> ---
> mm/zswap.c | 183 +++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 116 insertions(+), 67 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 99cd78891fd0..1be0f1807bfc 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1467,77 +1467,129 @@ static void shrink_worker(struct work_struct *w)
> * main API
> **********************************/
>
> -static ssize_t zswap_store_page(struct page *page,
> - struct obj_cgroup *objcg,
> - struct zswap_pool *pool)
> +static bool zswap_compress_folio(struct folio *folio,
> + struct zswap_entry *entries[],
> + struct zswap_pool *pool)
> {
> - swp_entry_t page_swpentry = page_swap_entry(page);
> - struct zswap_entry *entry, *old;
> + long index, nr_pages = folio_nr_pages(folio);
>
> - /* allocate entry */
> - entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> - if (!entry) {
> - zswap_reject_kmemcache_fail++;
> - return -EINVAL;
> + for (index = 0; index < nr_pages; ++index) {
> + struct page *page = folio_page(folio, index);
> +
> + if (!zswap_compress(page, entries[index], pool))
> + return false;
> }
>
> - if (!zswap_compress(page, entry, pool))
> - goto compress_failed;
> + return true;
> +}
>
> - old = xa_store(swap_zswap_tree(page_swpentry),
> - swp_offset(page_swpentry),
> - entry, GFP_KERNEL);
> - if (xa_is_err(old)) {
> - int err = xa_err(old);
> +/*
> + * Store all pages in a folio.
> + *
> + * The error handling from all failure points is consolidated to the
> + * "store_folio_failed" label, based on the initialization of the zswap entries'
> + * handles to ERR_PTR(-EINVAL) at allocation time, and the fact that the
> + * entry's handle is subsequently modified only upon a successful zpool_malloc()
> + * after the page is compressed.
> + */
> +static ssize_t zswap_store_folio(struct folio *folio,
> + struct obj_cgroup *objcg,
> + struct zswap_pool *pool)
> +{
> + long index, nr_pages = folio_nr_pages(folio);
> + struct zswap_entry **entries = NULL;
> + int node_id = folio_nid(folio);
> + size_t compressed_bytes = 0;
>
> - WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> - zswap_reject_alloc_fail++;
> - goto store_failed;
> + entries = kmalloc(nr_pages * sizeof(*entries), GFP_KERNEL);
We can probably use kcalloc() here.
> + if (!entries)
> + return -ENOMEM;
> +
> + /* allocate entries */
This comment can be dropped.
> + for (index = 0; index < nr_pages; ++index) {
> + entries[index] = zswap_entry_cache_alloc(GFP_KERNEL, node_id);
> +
> + if (!entries[index]) {
> + zswap_reject_kmemcache_fail++;
> + nr_pages = index;
> + goto store_folio_failed;
> + }
> +
> + entries[index]->handle = (unsigned long)ERR_PTR(-EINVAL);
> }
>
> - /*
> - * We may have had an existing entry that became stale when
> - * the folio was redirtied and now the new version is being
> - * swapped out. Get rid of the old.
> - */
> - if (old)
> - zswap_entry_free(old);
> + if (!zswap_compress_folio(folio, entries, pool))
> + goto store_folio_failed;
>
> - /*
> - * The entry is successfully compressed and stored in the tree, there is
> - * no further possibility of failure. Grab refs to the pool and objcg.
> - * These refs will be dropped by zswap_entry_free() when the entry is
> - * removed from the tree.
> - */
> - zswap_pool_get(pool);
> - if (objcg)
> - obj_cgroup_get(objcg);
> + for (index = 0; index < nr_pages; ++index) {
> + swp_entry_t page_swpentry = page_swap_entry(folio_page(folio, index));
> + struct zswap_entry *old, *entry = entries[index];
> +
> + old = xa_store(swap_zswap_tree(page_swpentry),
> + swp_offset(page_swpentry),
> + entry, GFP_KERNEL);
> + if (xa_is_err(old)) {
> + int err = xa_err(old);
> +
> + WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> + zswap_reject_alloc_fail++;
> + goto store_folio_failed;
> + }
>
> - /*
> - * We finish initializing the entry while it's already in xarray.
> - * This is safe because:
> - *
> - * 1. Concurrent stores and invalidations are excluded by folio lock.
> - *
> - * 2. Writeback is excluded by the entry not being on the LRU yet.
> - * The publishing order matters to prevent writeback from seeing
> - * an incoherent entry.
> - */
> - entry->pool = pool;
> - entry->swpentry = page_swpentry;
> - entry->objcg = objcg;
> - entry->referenced = true;
> - if (entry->length) {
> - INIT_LIST_HEAD(&entry->lru);
> - zswap_lru_add(&zswap_list_lru, entry);
> + /*
> + * We may have had an existing entry that became stale when
> + * the folio was redirtied and now the new version is being
> + * swapped out. Get rid of the old.
> + */
> + if (old)
> + zswap_entry_free(old);
> +
> + /*
> + * The entry is successfully compressed and stored in the tree, there is
> + * no further possibility of failure. Grab refs to the pool and objcg.
> + * These refs will be dropped by zswap_entry_free() when the entry is
> + * removed from the tree.
> + */
> + zswap_pool_get(pool);
> + if (objcg)
> + obj_cgroup_get(objcg);
> +
> + /*
> + * We finish initializing the entry while it's already in xarray.
> + * This is safe because:
> + *
> + * 1. Concurrent stores and invalidations are excluded by folio lock.
> + *
> + * 2. Writeback is excluded by the entry not being on the LRU yet.
> + * The publishing order matters to prevent writeback from seeing
> + * an incoherent entry.
> + */
> + entry->pool = pool;
> + entry->swpentry = page_swpentry;
> + entry->objcg = objcg;
> + entry->referenced = true;
> + if (entry->length) {
> + INIT_LIST_HEAD(&entry->lru);
> + zswap_lru_add(&zswap_list_lru, entry);
> + }
> +
> + compressed_bytes += entry->length;
> }
>
> - return entry->length;
> + kfree(entries);
> +
> + return compressed_bytes;
> +
> +store_folio_failed:
> + for (index = 0; index < nr_pages; ++index) {
> + if (!IS_ERR_VALUE(entries[index]->handle))
> + zpool_free(pool->zpool, entries[index]->handle);
> +
> + zswap_entry_cache_free(entries[index]);
> + }
If there is a failure in xa_store() halfway through the entries, this
loop will free all the compressed objects and entries. But, some of
the entries are already in the xarray, and zswap_store() will try to
free them again. This seems like a bug, or did I miss something here?
> +
> + kfree(entries);
>
> -store_failed:
> - zpool_free(pool->zpool, entry->handle);
> -compress_failed:
> - zswap_entry_cache_free(entry);
> return -EINVAL;
> }
>
> @@ -1549,8 +1601,8 @@ bool zswap_store(struct folio *folio)
> struct mem_cgroup *memcg = NULL;
> struct zswap_pool *pool;
> size_t compressed_bytes = 0;
> + ssize_t bytes;
> bool ret = false;
> - long index;
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
> VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1584,15 +1636,11 @@ bool zswap_store(struct folio *folio)
> mem_cgroup_put(memcg);
> }
>
> - for (index = 0; index < nr_pages; ++index) {
> - struct page *page = folio_page(folio, index);
> - ssize_t bytes;
> + bytes = zswap_store_folio(folio, objcg, pool);
> + if (bytes < 0)
> + goto put_pool;
>
> - bytes = zswap_store_page(page, objcg, pool);
> - if (bytes < 0)
> - goto put_pool;
> - compressed_bytes += bytes;
> - }
> + compressed_bytes = bytes;
What's the point of having both compressed_bytes and bytes now?
>
> if (objcg) {
> obj_cgroup_charge_zswap(objcg, compressed_bytes);
> @@ -1622,6 +1670,7 @@ bool zswap_store(struct folio *folio)
> pgoff_t offset = swp_offset(swp);
> struct zswap_entry *entry;
> struct xarray *tree;
> + long index;
>
> for (index = 0; index < nr_pages; ++index) {
> tree = swap_zswap_tree(swp_entry(type, offset + index));
> --
> 2.27.0
>
Powered by blists - more mailing lists