[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJD7tkYZSWL9WQ9X9UMLNTbDcF0hX=t90Ouf22WWHrcUvXyPRg@mail.gmail.com>
Date: Mon, 2 Dec 2024 11:33:51 -0800
From: Yosry Ahmed <yosryahmed@...gle.com>
To: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
Cc: linux-kernel@...r.kernel.org, linux-mm@...ck.org, hannes@...xchg.org,
nphamcs@...il.com, chengming.zhou@...ux.dev, usamaarif642@...il.com,
ryan.roberts@....com, 21cnbao@...il.com, akpm@...ux-foundation.org,
wajdi.k.feghali@...el.com, vinodh.gopal@...el.com
Subject: Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process
multiple pages in a folio.
On Wed, Nov 27, 2024 at 2:53 PM Kanchana P Sridhar
<kanchana.p.sridhar@...el.com> wrote:
>
> Modified zswap_store() to store the folio in batches of
> SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored zswap_store_page()
> into zswap_store_pages() that processes a range of pages in the folio.
> zswap_store_pages() is a vectorized version of zswap_store_page().
>
> For now, zswap_store_pages() will sequentially compress these pages with
> zswap_compress().
>
> These changes are follow-up to code review comments received for [1], and
> are intended to set up zswap_store() for batching with Intel IAA.
>
> [1]: https://patchwork.kernel.org/project/linux-mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
>
> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
> ---
> include/linux/zswap.h | 1 +
> mm/zswap.c | 154 ++++++++++++++++++++++++------------------
> 2 files changed, 88 insertions(+), 67 deletions(-)
>
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index d961ead91bf1..05a81e750744 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -7,6 +7,7 @@
>
> struct lruvec;
>
> +#define SWAP_CRYPTO_BATCH_SIZE 8UL
> extern atomic_long_t zswap_stored_pages;
>
> #ifdef CONFIG_ZSWAP
> diff --git a/mm/zswap.c b/mm/zswap.c
> index f6316b66fb23..b09d1023e775 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct *w)
> * main API
> **********************************/
>
> -static ssize_t zswap_store_page(struct page *page,
> - struct obj_cgroup *objcg,
> - struct zswap_pool *pool)
> +/*
> + * Store multiple pages in @folio, starting from the page at index @si up to
> + * and including the page at index @ei.
> + */
> +static ssize_t zswap_store_pages(struct folio *folio,
> + long si,
> + long ei,
> + struct obj_cgroup *objcg,
> + struct zswap_pool *pool)
> {
> - swp_entry_t page_swpentry = page_swap_entry(page);
> + struct page *page;
> + swp_entry_t page_swpentry;
> struct zswap_entry *entry, *old;
> + size_t compressed_bytes = 0;
> + u8 nr_pages = ei - si + 1;
> + u8 i;
> +
> + for (i = 0; i < nr_pages; ++i) {
> + page = folio_page(folio, si + i);
> + page_swpentry = page_swap_entry(page);
> +
> + /* allocate entry */
> + entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> + if (!entry) {
> + zswap_reject_kmemcache_fail++;
> + return -EINVAL;
> + }
I think this patch is wrong on its own, for example if an allocation
fails in the above loop we exit without cleaning up previous
allocations. I think it's fixed in patch 2 but we cannot introduce
bugs in-between patches. I think the helpers in patch 2 don't really
help as I mentioned. Please combine the changes and keep them in the
main series (unless you have a reason not to).
>
> - /* allocate entry */
> - entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> - if (!entry) {
> - zswap_reject_kmemcache_fail++;
> - return -EINVAL;
> - }
> -
> - if (!zswap_compress(page, entry, pool))
> - goto compress_failed;
> + if (!zswap_compress(page, entry, pool))
> + goto compress_failed;
>
> - old = xa_store(swap_zswap_tree(page_swpentry),
> - swp_offset(page_swpentry),
> - entry, GFP_KERNEL);
> - if (xa_is_err(old)) {
> - int err = xa_err(old);
> + old = xa_store(swap_zswap_tree(page_swpentry),
> + swp_offset(page_swpentry),
> + entry, GFP_KERNEL);
> + if (xa_is_err(old)) {
> + int err = xa_err(old);
>
> - WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> - zswap_reject_alloc_fail++;
> - goto store_failed;
> - }
> + WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> + zswap_reject_alloc_fail++;
> + goto store_failed;
> + }
>
> - /*
> - * We may have had an existing entry that became stale when
> - * the folio was redirtied and now the new version is being
> - * swapped out. Get rid of the old.
> - */
> - if (old)
> - zswap_entry_free(old);
> + /*
> + * We may have had an existing entry that became stale when
> + * the folio was redirtied and now the new version is being
> + * swapped out. Get rid of the old.
> + */
> + if (old)
> + zswap_entry_free(old);
>
> - /*
> - * The entry is successfully compressed and stored in the tree, there is
> - * no further possibility of failure. Grab refs to the pool and objcg.
> - * These refs will be dropped by zswap_entry_free() when the entry is
> - * removed from the tree.
> - */
> - zswap_pool_get(pool);
> - if (objcg)
> - obj_cgroup_get(objcg);
> + /*
> + * The entry is successfully compressed and stored in the tree, there is
> + * no further possibility of failure. Grab refs to the pool and objcg.
> + * These refs will be dropped by zswap_entry_free() when the entry is
> + * removed from the tree.
> + */
> + zswap_pool_get(pool);
> + if (objcg)
> + obj_cgroup_get(objcg);
>
> - /*
> - * We finish initializing the entry while it's already in xarray.
> - * This is safe because:
> - *
> - * 1. Concurrent stores and invalidations are excluded by folio lock.
> - *
> - * 2. Writeback is excluded by the entry not being on the LRU yet.
> - * The publishing order matters to prevent writeback from seeing
> - * an incoherent entry.
> - */
> - entry->pool = pool;
> - entry->swpentry = page_swpentry;
> - entry->objcg = objcg;
> - entry->referenced = true;
> - if (entry->length) {
> - INIT_LIST_HEAD(&entry->lru);
> - zswap_lru_add(&zswap_list_lru, entry);
> - }
> + /*
> + * We finish initializing the entry while it's already in xarray.
> + * This is safe because:
> + *
> + * 1. Concurrent stores and invalidations are excluded by folio lock.
> + *
> + * 2. Writeback is excluded by the entry not being on the LRU yet.
> + * The publishing order matters to prevent writeback from seeing
> + * an incoherent entry.
> + */
> + entry->pool = pool;
> + entry->swpentry = page_swpentry;
> + entry->objcg = objcg;
> + entry->referenced = true;
> + if (entry->length) {
> + INIT_LIST_HEAD(&entry->lru);
> + zswap_lru_add(&zswap_list_lru, entry);
> + }
>
> - return entry->length;
> + compressed_bytes += entry->length;
> + continue;
>
> store_failed:
> - zpool_free(pool->zpool, entry->handle);
> + zpool_free(pool->zpool, entry->handle);
> compress_failed:
> - zswap_entry_cache_free(entry);
> - return -EINVAL;
> + zswap_entry_cache_free(entry);
> + return -EINVAL;
> + }
> +
> + return compressed_bytes;
> }
>
> bool zswap_store(struct folio *folio)
> @@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
> struct zswap_pool *pool;
> size_t compressed_bytes = 0;
> bool ret = false;
> - long index;
> + long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
>
> VM_WARN_ON_ONCE(!folio_test_locked(folio));
> VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
> mem_cgroup_put(memcg);
> }
>
> - for (index = 0; index < nr_pages; ++index) {
> - struct page *page = folio_page(folio, index);
> + /* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
> + for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
> + ((si < nr_pages) && (ei < nr_pages));
> + si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
> ssize_t bytes;
>
> - bytes = zswap_store_page(page, objcg, pool);
> + bytes = zswap_store_pages(folio, si, ei, objcg, pool);
> if (bytes < 0)
> goto put_pool;
> compressed_bytes += bytes;
> @@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
> struct zswap_entry *entry;
> struct xarray *tree;
>
> - for (index = 0; index < nr_pages; ++index) {
> - tree = swap_zswap_tree(swp_entry(type, offset + index));
> - entry = xa_erase(tree, offset + index);
> + for (si = 0; si < nr_pages; ++si) {
> + tree = swap_zswap_tree(swp_entry(type, offset + si));
> + entry = xa_erase(tree, offset + si);
> if (entry)
> zswap_entry_free(entry);
> }
> --
> 2.27.0
>
Powered by blists - more mailing lists