[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241018064101.336232-12-kanchana.p.sridhar@intel.com>
Date: Thu, 17 Oct 2024 23:40:59 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
To: linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
hannes@...xchg.org,
yosryahmed@...gle.com,
nphamcs@...il.com,
chengming.zhou@...ux.dev,
usamaarif642@...il.com,
ryan.roberts@....com,
ying.huang@...el.com,
21cnbao@...il.com,
akpm@...ux-foundation.org,
linux-crypto@...r.kernel.org,
herbert@...dor.apana.org.au,
davem@...emloft.net,
clabbe@...libre.com,
ardb@...nel.org,
ebiggers@...gle.com,
surenb@...gle.com,
kristen.c.accardi@...el.com,
zanussi@...nel.org,
viro@...iv.linux.org.uk,
brauner@...nel.org,
jack@...e.cz,
mcgrof@...nel.org,
kees@...nel.org,
joel.granados@...nel.org,
bfoster@...hat.com,
willy@...radead.org,
linux-fsdevel@...r.kernel.org
Cc: wajdi.k.feghali@...el.com,
vinodh.gopal@...el.com,
kanchana.p.sridhar@...el.com
Subject: [RFC PATCH v1 11/13] mm: swap: Add IAA batch compression API swap_crypto_acomp_compress_batch().
Added a new API swap_crypto_acomp_compress_batch() that does batch
compression. A system that has Intel IAA can avail of this API to submit a
batch of compress jobs for parallel compression in the hardware, to improve
performance. On a system without IAA, this API will process each compress
job sequentially.
The purpose of this API is to be invocable from any swap module that needs
to compress large folios, or a batch of pages in the general case. For
instance, zswap would batch compress up to SWAP_CRYPTO_SUB_BATCH_SIZE
(i.e. 8 if the system has IAA) pages in the large folio in parallel to
improve zswap_store() performance.
Towards this eventual goal:
1) The definition of "struct crypto_acomp_ctx" is moved to mm/swap.h
so that mm modules like swap_state.c and zswap.c can reference it.
2) The swap_crypto_acomp_compress_batch() interface is implemented in
swap_state.c.
It would be preferable for "struct crypto_acomp_ctx" to be defined in,
and for swap_crypto_acomp_compress_batch() to be exported via
include/linux/swap.h so that modules outside mm (for e.g. zram) can
potentially use the API for batch compressions with IAA. I would
appreciate RFC comments on this.
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
---
mm/swap.h | 45 +++++++++++++++++++
mm/swap_state.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++
mm/zswap.c | 9 ----
3 files changed, 160 insertions(+), 9 deletions(-)
diff --git a/mm/swap.h b/mm/swap.h
index 566616c971d4..4dcb67e2cc33 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -7,6 +7,7 @@ struct mempolicy;
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
+#include <linux/crypto.h>
/*
* For IAA compression batching:
@@ -19,6 +20,39 @@ struct mempolicy;
#define SWAP_CRYPTO_SUB_BATCH_SIZE 1UL
#endif
+/* linux/mm/swap_state.c, zswap.c */
+struct crypto_acomp_ctx {
+ struct crypto_acomp *acomp;
+ struct acomp_req *req[SWAP_CRYPTO_SUB_BATCH_SIZE];
+ u8 *buffer[SWAP_CRYPTO_SUB_BATCH_SIZE];
+ struct crypto_wait wait;
+ struct mutex mutex;
+ bool is_sleepable;
+};
+
+/**
+ * This API provides IAA compress batching functionality for use by swap
+ * modules.
+ * The acomp_ctx mutex should be locked/unlocked before/after calling this
+ * procedure.
+ *
+ * @pages: Pages to be compressed.
+ * @dsts: Pre-allocated destination buffers to store results of IAA compression.
+ * @dlens: Will contain the compressed lengths.
+ * @errors: Will contain a 0 if the page was successfully compressed, or a
+ * non-0 error value to be processed by the calling function.
+ * @nr_pages: The number of pages, up to SWAP_CRYPTO_SUB_BATCH_SIZE,
+ * to be compressed.
+ * @acomp_ctx: The acomp context for iaa_crypto/other compressor.
+ */
+void swap_crypto_acomp_compress_batch(
+ struct page *pages[],
+ u8 *dsts[],
+ unsigned int dlens[],
+ int errors[],
+ int nr_pages,
+ struct crypto_acomp_ctx *acomp_ctx);
+
/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
@@ -119,6 +153,17 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
#else /* CONFIG_SWAP */
struct swap_iocb;
+struct crypto_acomp_ctx {};
+static inline void swap_crypto_acomp_compress_batch(
+ struct page *pages[],
+ u8 *dsts[],
+ unsigned int dlens[],
+ int errors[],
+ int nr_pages,
+ struct crypto_acomp_ctx *acomp_ctx)
+{
+}
+
static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
{
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 4669f29cf555..117c3caa5679 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -23,6 +23,8 @@
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
#include <linux/shmem_fs.h>
+#include <linux/scatterlist.h>
+#include <crypto/acompress.h>
#include "internal.h"
#include "swap.h"
@@ -742,6 +744,119 @@ void exit_swap_address_space(unsigned int type)
swapper_spaces[type] = NULL;
}
+#ifdef CONFIG_SWAP
+
+/**
+ * This API provides IAA compress batching functionality for use by swap
+ * modules.
+ * The acomp_ctx mutex should be locked/unlocked before/after calling this
+ * procedure.
+ *
+ * @pages: Pages to be compressed.
+ * @dsts: Pre-allocated destination buffers to store results of IAA compression.
+ * @dlens: Will contain the compressed lengths.
+ * @errors: Will contain a 0 if the page was successfully compressed, or a
+ * non-0 error value to be processed by the calling function.
+ * @nr_pages: The number of pages, up to SWAP_CRYPTO_SUB_BATCH_SIZE,
+ * to be compressed.
+ * @acomp_ctx: The acomp context for iaa_crypto/other compressor.
+ */
+void swap_crypto_acomp_compress_batch(
+ struct page *pages[],
+ u8 *dsts[],
+ unsigned int dlens[],
+ int errors[],
+ int nr_pages,
+ struct crypto_acomp_ctx *acomp_ctx)
+{
+ struct scatterlist inputs[SWAP_CRYPTO_SUB_BATCH_SIZE];
+ struct scatterlist outputs[SWAP_CRYPTO_SUB_BATCH_SIZE];
+ bool compressions_done = false;
+ int i, j;
+
+ BUG_ON(nr_pages > SWAP_CRYPTO_SUB_BATCH_SIZE);
+
+ /*
+ * Prepare and submit acomp_reqs to IAA.
+ * IAA will process these compress jobs in parallel in async mode.
+ * If the compressor does not support a poll() method, or if IAA is
+ * used in sync mode, the jobs will be processed sequentially using
+ * acomp_ctx->req[0] and acomp_ctx->wait.
+ */
+ for (i = 0; i < nr_pages; ++i) {
+ j = acomp_ctx->acomp->poll ? i : 0;
+ sg_init_table(&inputs[i], 1);
+ sg_set_page(&inputs[i], pages[i], PAGE_SIZE, 0);
+
+ /*
+ * Each acomp_ctx->buffer[] is of size (PAGE_SIZE * 2).
+ * Reflect same in sg_list.
+ */
+ sg_init_one(&outputs[i], dsts[i], PAGE_SIZE * 2);
+ acomp_request_set_params(acomp_ctx->req[j], &inputs[i],
+ &outputs[i], PAGE_SIZE, dlens[i]);
+
+ /*
+ * If the crypto_acomp provides an asynchronous poll()
+ * interface, submit the request to the driver now, and poll for
+ * a completion status later, after all descriptors have been
+ * submitted. If the crypto_acomp does not provide a poll()
+ * interface, submit the request and wait for it to complete,
+ * i.e., synchronously, before moving on to the next request.
+ */
+ if (acomp_ctx->acomp->poll) {
+ errors[i] = crypto_acomp_compress(acomp_ctx->req[j]);
+
+ if (errors[i] != -EINPROGRESS)
+ errors[i] = -EINVAL;
+ else
+ errors[i] = -EAGAIN;
+ } else {
+ errors[i] = crypto_wait_req(
+ crypto_acomp_compress(acomp_ctx->req[j]),
+ &acomp_ctx->wait);
+ if (!errors[i])
+ dlens[i] = acomp_ctx->req[j]->dlen;
+ }
+ }
+
+ /*
+ * If not doing async compressions, the batch has been processed at
+ * this point and we can return.
+ */
+ if (!acomp_ctx->acomp->poll)
+ return;
+
+ /*
+ * Poll for and process IAA compress job completions
+ * in out-of-order manner.
+ */
+ while (!compressions_done) {
+ compressions_done = true;
+
+ for (i = 0; i < nr_pages; ++i) {
+ /*
+ * Skip, if the compression has already completed
+ * successfully or with an error.
+ */
+ if (errors[i] != -EAGAIN)
+ continue;
+
+ errors[i] = crypto_acomp_poll(acomp_ctx->req[i]);
+
+ if (errors[i]) {
+ if (errors[i] == -EAGAIN)
+ compressions_done = false;
+ } else {
+ dlens[i] = acomp_ctx->req[i]->dlen;
+ }
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(swap_crypto_acomp_compress_batch);
+
+#endif /* CONFIG_SWAP */
+
static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
unsigned long *end)
{
diff --git a/mm/zswap.c b/mm/zswap.c
index 579869d1bdf6..cab3114321f9 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -150,15 +150,6 @@ bool zswap_never_enabled(void)
* data structures
**********************************/
-struct crypto_acomp_ctx {
- struct crypto_acomp *acomp;
- struct acomp_req *req[SWAP_CRYPTO_SUB_BATCH_SIZE];
- u8 *buffer[SWAP_CRYPTO_SUB_BATCH_SIZE];
- struct crypto_wait wait;
- struct mutex mutex;
- bool is_sleepable;
-};
-
/*
* The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
* The only case where lru_lock is not acquired while holding tree.lock is
--
2.27.0
Powered by blists - more mailing lists