[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJD7tkbF6D4d2kLvXv3-Tgq=LE5i3O2mXZc5qBvPS9wToFV2rQ@mail.gmail.com>
Date: Tue, 7 Jan 2025 20:22:29 -0800
From: Yosry Ahmed <yosryahmed@...gle.com>
To: "Sridhar, Kanchana P" <kanchana.p.sridhar@...el.com>
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>, "linux-mm@...ck.org" <linux-mm@...ck.org>,
"hannes@...xchg.org" <hannes@...xchg.org>, "nphamcs@...il.com" <nphamcs@...il.com>,
"chengming.zhou@...ux.dev" <chengming.zhou@...ux.dev>,
"usamaarif642@...il.com" <usamaarif642@...il.com>, "ryan.roberts@....com" <ryan.roberts@....com>,
"21cnbao@...il.com" <21cnbao@...il.com>, "akpm@...ux-foundation.org" <akpm@...ux-foundation.org>,
"linux-crypto@...r.kernel.org" <linux-crypto@...r.kernel.org>,
"herbert@...dor.apana.org.au" <herbert@...dor.apana.org.au>, "davem@...emloft.net" <davem@...emloft.net>,
"clabbe@...libre.com" <clabbe@...libre.com>, "ardb@...nel.org" <ardb@...nel.org>,
"ebiggers@...gle.com" <ebiggers@...gle.com>, "surenb@...gle.com" <surenb@...gle.com>,
"Accardi, Kristen C" <kristen.c.accardi@...el.com>,
"Feghali, Wajdi K" <wajdi.k.feghali@...el.com>, "Gopal, Vinodh" <vinodh.gopal@...el.com>
Subject: Re: [PATCH v5 11/12] mm: zswap: Restructure & simplify zswap_store()
to make it amenable for batching.
[..]
> > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > index 99cd78891fd0..1be0f1807bfc 100644
> > > --- a/mm/zswap.c
> > > +++ b/mm/zswap.c
> > > @@ -1467,77 +1467,129 @@ static void shrink_worker(struct work_struct
> > *w)
> > > * main API
> > > **********************************/
> > >
> > > -static ssize_t zswap_store_page(struct page *page,
> > > - struct obj_cgroup *objcg,
> > > - struct zswap_pool *pool)
> > > +static bool zswap_compress_folio(struct folio *folio,
> > > + struct zswap_entry *entries[],
> > > + struct zswap_pool *pool)
> > > {
> > > - swp_entry_t page_swpentry = page_swap_entry(page);
> > > - struct zswap_entry *entry, *old;
> > > + long index, nr_pages = folio_nr_pages(folio);
> > >
> > > - /* allocate entry */
> > > - entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> > > - if (!entry) {
> > > - zswap_reject_kmemcache_fail++;
> > > - return -EINVAL;
> > > + for (index = 0; index < nr_pages; ++index) {
> > > + struct page *page = folio_page(folio, index);
> > > +
> > > + if (!zswap_compress(page, entries[index], pool))
> > > + return false;
> > > }
> > >
> > > - if (!zswap_compress(page, entry, pool))
> > > - goto compress_failed;
> > > + return true;
> > > +}
> > >
> > > - old = xa_store(swap_zswap_tree(page_swpentry),
> > > - swp_offset(page_swpentry),
> > > - entry, GFP_KERNEL);
> > > - if (xa_is_err(old)) {
> > > - int err = xa_err(old);
> > > +/*
> > > + * Store all pages in a folio.
> > > + *
> > > + * The error handling from all failure points is consolidated to the
> > > + * "store_folio_failed" label, based on the initialization of the zswap
> > entries'
> > > + * handles to ERR_PTR(-EINVAL) at allocation time, and the fact that the
> > > + * entry's handle is subsequently modified only upon a successful
> > zpool_malloc()
> > > + * after the page is compressed.
> > > + */
> > > +static ssize_t zswap_store_folio(struct folio *folio,
> > > + struct obj_cgroup *objcg,
> > > + struct zswap_pool *pool)
> > > +{
> > > + long index, nr_pages = folio_nr_pages(folio);
> > > + struct zswap_entry **entries = NULL;
> > > + int node_id = folio_nid(folio);
> > > + size_t compressed_bytes = 0;
> > >
> > > - WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n",
> > err);
> > > - zswap_reject_alloc_fail++;
> > > - goto store_failed;
> > > + entries = kmalloc(nr_pages * sizeof(*entries), GFP_KERNEL);
> >
> > We can probably use kcalloc() here.
>
> I am a little worried about the latency penalty of kcalloc() in the reclaim path,
> especially since I am not relying on zero-initialized memory for "entries"..
Hmm good point, for a 2M THP we could be allocating an entire page here.
[..]
> > > @@ -1549,8 +1601,8 @@ bool zswap_store(struct folio *folio)
> > > struct mem_cgroup *memcg = NULL;
> > > struct zswap_pool *pool;
> > > size_t compressed_bytes = 0;
> > > + ssize_t bytes;
> > > bool ret = false;
> > > - long index;
> > >
> > > VM_WARN_ON_ONCE(!folio_test_locked(folio));
> > > VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> > > @@ -1584,15 +1636,11 @@ bool zswap_store(struct folio *folio)
> > > mem_cgroup_put(memcg);
> > > }
> > >
> > > - for (index = 0; index < nr_pages; ++index) {
> > > - struct page *page = folio_page(folio, index);
> > > - ssize_t bytes;
> > > + bytes = zswap_store_folio(folio, objcg, pool);
> > > + if (bytes < 0)
> > > + goto put_pool;
> > >
> > > - bytes = zswap_store_page(page, objcg, pool);
> > > - if (bytes < 0)
> > > - goto put_pool;
> > > - compressed_bytes += bytes;
> > > - }
> > > + compressed_bytes = bytes;
> >
> > What's the point of having both compressed_bytes and bytes now?
>
> The main reason was to cleanly handle a negative error value returned in "bytes"
> (declared as ssize_t), as against a true total "compressed_bytes" (declared as size_t)
> for the folio to use for objcg charging. This is similar to the current mainline
> code where zswap_store() calls zswap_store_page(). I was hoping to avoid potential
> issues with overflow/underflow, and for maintainability. Let me know if this is Ok.
It makes sense in the current mainline because we store the return
value of each call to zswap_store_page() in 'bytes', then check if
it's an error value, then add it to 'compressed_bytes'. Now we have a
single call to zswap_store_folio() and a single return value. AFAICT,
there is currently no benefit to storing it in 'bytes', checking it,
then moving it to 'compressed_bytes'. The compiler will probably
optimize the variable away anyway, but it looks weird.
Powered by blists - more mailing lists