lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250704042323.10318-17-kanchana.p.sridhar@intel.com>
Date: Thu,  3 Jul 2025 21:23:14 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
To: linux-kernel@...r.kernel.org,
	linux-mm@...ck.org,
	hannes@...xchg.org,
	yosry.ahmed@...ux.dev,
	nphamcs@...il.com,
	chengming.zhou@...ux.dev,
	usamaarif642@...il.com,
	ryan.roberts@....com,
	21cnbao@...il.com,
	ying.huang@...ux.alibaba.com,
	akpm@...ux-foundation.org,
	senozhatsky@...omium.org,
	linux-crypto@...r.kernel.org,
	herbert@...dor.apana.org.au,
	davem@...emloft.net,
	clabbe@...libre.com,
	ardb@...nel.org,
	ebiggers@...gle.com,
	surenb@...gle.com,
	kristen.c.accardi@...el.com,
	vinicius.gomes@...el.com
Cc: wajdi.k.feghali@...el.com,
	vinodh.gopal@...el.com,
	kanchana.p.sridhar@...el.com
Subject: [PATCH v10 16/25] crypto: iaa - Submit the two largest source buffers first in decompress batching.

This patch finds the two largest source buffers in a given decompression
batch, and submits them first to the IAA decompress engines.

This improves decompress batching latency because the hardware has a
head start on decompressing the highest latency source buffers in the
batch. Workload performance is also significantly improved as a result
of this optimization.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
---
 drivers/crypto/intel/iaa/iaa_crypto_main.c | 60 +++++++++++++++++++++-
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 09d786e85ab66..4ed56a69112a9 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -2375,6 +2375,35 @@ static int iaa_comp_acompress_batch(
 	return err;
 }
 
+/*
+ * Find the two largest source buffers in @slens for a decompress batch,
+ * and pass their indices back in @idx_max and @idx_next_max.
+ *
+ * Returns true if there is no second largest source buffer, only a max buffer.
+ */
+static __always_inline bool decomp_batch_get_max_slens_idx(
+	unsigned int slens[],
+	int nr_pages,
+	int *idx_max,
+	int *idx_next_max)
+{
+	int i, max_i = 0, next_max_i = 0;
+
+	for (i = 0; i < nr_pages; ++i) {
+		if (slens[i] >= slens[max_i]) {
+			next_max_i = max_i;
+			max_i = i;
+		} else if ((next_max_i == max_i) || (slens[i] > slens[next_max_i])) {
+			next_max_i = i;
+		}
+	}
+
+	*idx_max = max_i;
+	*idx_next_max = next_max_i;
+
+	return (next_max_i == max_i);
+}
+
 /**
  * This API provides IAA decompress batching functionality for use by swap
  * modules.
@@ -2407,18 +2436,36 @@ static int iaa_comp_adecompress_batch(
 {
 	struct scatterlist inputs[IAA_CRYPTO_MAX_BATCH_SIZE];
 	struct scatterlist outputs[IAA_CRYPTO_MAX_BATCH_SIZE];
+	bool max_processed = false, next_max_processed = false;
 	bool decompressions_done = false;
-	int i, err = 0;
+	int i, max_i, next_max_i, err = 0;
 
 	BUG_ON(nr_reqs > IAA_CRYPTO_MAX_BATCH_SIZE);
 
 	iaa_set_req_poll(reqs, nr_reqs, true);
 
+	/*
+	 * Get the indices of the two largest decomp buffers in the batch.
+	 * Submit them first. This improves latency of the batch.
+	 */
+	next_max_processed = decomp_batch_get_max_slens_idx(slens, nr_reqs,
+							    &max_i, &next_max_i);
+
+	i = max_i;
+
 	/*
 	 * Prepare and submit the batch of iaa_reqs to IAA. IAA will process
 	 * these decompress jobs in parallel.
 	 */
-	for (i = 0; i < nr_reqs; ++i) {
+	for (; i < nr_reqs; ++i) {
+		if ((i == max_i) && max_processed)
+			continue;
+		if ((i == next_max_i) && max_processed && next_max_processed)
+			continue;
+
+		if (max_processed && !next_max_processed)
+			i = next_max_i;
+
 		reqs[i]->src = &inputs[i];
 		reqs[i]->dst = &outputs[i];
 		sg_init_one(reqs[i]->src, srcs[i], slens[i]);
@@ -2437,6 +2484,15 @@ static int iaa_comp_adecompress_batch(
 			errors[i] = -EAGAIN;
 		else if (errors[i])
 			err = -EINVAL;
+
+		if (i == max_i) {
+			max_processed = true;
+			i = -1;
+		}
+		if (i == next_max_i) {
+			next_max_processed = true;
+			i = -1;
+		}
 	}
 
 	/*
-- 
2.27.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ