[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250704042323.10318-17-kanchana.p.sridhar@intel.com>
Date: Thu, 3 Jul 2025 21:23:14 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
To: linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
hannes@...xchg.org,
yosry.ahmed@...ux.dev,
nphamcs@...il.com,
chengming.zhou@...ux.dev,
usamaarif642@...il.com,
ryan.roberts@....com,
21cnbao@...il.com,
ying.huang@...ux.alibaba.com,
akpm@...ux-foundation.org,
senozhatsky@...omium.org,
linux-crypto@...r.kernel.org,
herbert@...dor.apana.org.au,
davem@...emloft.net,
clabbe@...libre.com,
ardb@...nel.org,
ebiggers@...gle.com,
surenb@...gle.com,
kristen.c.accardi@...el.com,
vinicius.gomes@...el.com
Cc: wajdi.k.feghali@...el.com,
vinodh.gopal@...el.com,
kanchana.p.sridhar@...el.com
Subject: [PATCH v10 16/25] crypto: iaa - Submit the two largest source buffers first in decompress batching.
This patch finds the two largest source buffers in a given decompression
batch, and submits them first to the IAA decompress engines.
This improves decompress batching latency because the hardware has a
head start on decompressing the highest latency source buffers in the
batch. Workload performance is also significantly improved as a result
of this optimization.
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
---
drivers/crypto/intel/iaa/iaa_crypto_main.c | 60 +++++++++++++++++++++-
1 file changed, 58 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 09d786e85ab66..4ed56a69112a9 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -2375,6 +2375,35 @@ static int iaa_comp_acompress_batch(
return err;
}
+/*
+ * Find the two largest source buffers in @slens for a decompress batch,
+ * and pass their indices back in @idx_max and @idx_next_max.
+ *
+ * Returns true if there is no second largest source buffer, only a max buffer.
+ */
+static __always_inline bool decomp_batch_get_max_slens_idx(
+ unsigned int slens[],
+ int nr_pages,
+ int *idx_max,
+ int *idx_next_max)
+{
+ int i, max_i = 0, next_max_i = 0;
+
+ for (i = 0; i < nr_pages; ++i) {
+ if (slens[i] >= slens[max_i]) {
+ next_max_i = max_i;
+ max_i = i;
+ } else if ((next_max_i == max_i) || (slens[i] > slens[next_max_i])) {
+ next_max_i = i;
+ }
+ }
+
+ *idx_max = max_i;
+ *idx_next_max = next_max_i;
+
+ return (next_max_i == max_i);
+}
+
/**
* This API provides IAA decompress batching functionality for use by swap
* modules.
@@ -2407,18 +2436,36 @@ static int iaa_comp_adecompress_batch(
{
struct scatterlist inputs[IAA_CRYPTO_MAX_BATCH_SIZE];
struct scatterlist outputs[IAA_CRYPTO_MAX_BATCH_SIZE];
+ bool max_processed = false, next_max_processed = false;
bool decompressions_done = false;
- int i, err = 0;
+ int i, max_i, next_max_i, err = 0;
BUG_ON(nr_reqs > IAA_CRYPTO_MAX_BATCH_SIZE);
iaa_set_req_poll(reqs, nr_reqs, true);
+ /*
+ * Get the indices of the two largest decomp buffers in the batch.
+ * Submit them first. This improves latency of the batch.
+ */
+ next_max_processed = decomp_batch_get_max_slens_idx(slens, nr_reqs,
+ &max_i, &next_max_i);
+
+ i = max_i;
+
/*
* Prepare and submit the batch of iaa_reqs to IAA. IAA will process
* these decompress jobs in parallel.
*/
- for (i = 0; i < nr_reqs; ++i) {
+ for (; i < nr_reqs; ++i) {
+ if ((i == max_i) && max_processed)
+ continue;
+ if ((i == next_max_i) && max_processed && next_max_processed)
+ continue;
+
+ if (max_processed && !next_max_processed)
+ i = next_max_i;
+
reqs[i]->src = &inputs[i];
reqs[i]->dst = &outputs[i];
sg_init_one(reqs[i]->src, srcs[i], slens[i]);
@@ -2437,6 +2484,15 @@ static int iaa_comp_adecompress_batch(
errors[i] = -EAGAIN;
else if (errors[i])
err = -EINVAL;
+
+ if (i == max_i) {
+ max_processed = true;
+ i = -1;
+ }
+ if (i == next_max_i) {
+ next_max_processed = true;
+ i = -1;
+ }
}
/*
--
2.27.0
Powered by blists - more mailing lists