[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250430205305.22844-9-kanchana.p.sridhar@intel.com>
Date: Wed, 30 Apr 2025 13:52:54 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
To: linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
hannes@...xchg.org,
yosry.ahmed@...ux.dev,
nphamcs@...il.com,
chengming.zhou@...ux.dev,
usamaarif642@...il.com,
ryan.roberts@....com,
21cnbao@...il.com,
ying.huang@...ux.alibaba.com,
akpm@...ux-foundation.org,
linux-crypto@...r.kernel.org,
herbert@...dor.apana.org.au,
davem@...emloft.net,
clabbe@...libre.com,
ardb@...nel.org,
ebiggers@...gle.com,
surenb@...gle.com,
kristen.c.accardi@...el.com
Cc: wajdi.k.feghali@...el.com,
vinodh.gopal@...el.com,
kanchana.p.sridhar@...el.com
Subject: [PATCH v9 08/19] crypto: iaa - Descriptor allocation timeouts with mitigations in iaa_crypto.
This patch modifies the descriptor allocation from blocking to non-blocking
with bounded retries or "timeouts".
This is necessary to prevent task blocked errors in high contention
scenarios, for instance, when the platform has only 1 IAA device
enabled. With 1 IAA device enabled per package on a dual-package
Sapphire Rapids with 56 cores/package, there are 112 logical cores
mapped to this single IAA device. In this scenario, the task blocked
errors can occur because idxd_alloc_desc() is called with
IDXD_OP_BLOCK. With batching, multiple descriptors will need to be
allocated per batch. Any process that is able to do so, can cause
contention for allocating descriptors for all other processes that share
the use of the same sbitmap_queue. Under IDXD_OP_BLOCK, this can cause
compress/decompress jobs to stall in stress test scenarios
(e.g. zswap_store() of 2M folios).
In order to make the iaa_crypto driver be more fail-safe, this commit
implements the following:
1) Change compress/decompress descriptor allocations to be non-blocking
with retries ("timeouts").
2) Return compress error to zswap if descriptor allocation with timeouts
fails during compress ops. zswap_store() will return an error and the
folio gets stored in the backing swap device.
3) Fallback to software decompress if descriptor allocation with timeouts
fails during decompress ops.
With these fixes, there are no task blocked errors seen under stress
testing conditions, and no performance degradation observed.
This patch also simplifies the success/error return paths in
iaa_[de]compress() and iaa_compress_verify().
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@...el.com>
---
drivers/crypto/intel/iaa/iaa_crypto.h | 3 +
drivers/crypto/intel/iaa/iaa_crypto_main.c | 84 ++++++++++++----------
2 files changed, 48 insertions(+), 39 deletions(-)
diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index 549ac98a9366..b4a94da2c315 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -21,6 +21,9 @@
#define IAA_COMPLETION_TIMEOUT 1000000
+#define IAA_ALLOC_DESC_COMP_TIMEOUT 1000
+#define IAA_ALLOC_DESC_DECOMP_TIMEOUT 500
+
#define IAA_ANALYTICS_ERROR 0x0a
#define IAA_ERROR_DECOMP_BUF_OVERFLOW 0x0b
#define IAA_ERROR_COMP_BUF_OVERFLOW 0x19
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 0b821b8b4264..7dab340c4a34 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -1416,6 +1416,7 @@ static int deflate_generic_decompress(struct acomp_req *req)
ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req));
int ret;
+ req->dlen = PAGE_SIZE;
acomp_request_set_callback(fbreq, 0, NULL, NULL);
acomp_request_set_params(fbreq, req->src, req->dst, req->slen,
req->dlen);
@@ -1536,7 +1537,8 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
struct iaa_device_compression_mode *active_compression_mode;
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
struct iaa_device *iaa_device;
- struct idxd_desc *idxd_desc;
+ struct idxd_desc *idxd_desc = ERR_PTR(-EAGAIN);
+ u16 alloc_desc_retries = 0;
struct iax_hw_desc *desc;
struct idxd_device *idxd;
struct iaa_wq *iaa_wq;
@@ -1552,7 +1554,11 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
- idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+ while ((idxd_desc == ERR_PTR(-EAGAIN)) && (alloc_desc_retries++ < IAA_ALLOC_DESC_DECOMP_TIMEOUT)) {
+ idxd_desc = idxd_alloc_desc(wq, IDXD_OP_NONBLOCK);
+ cpu_relax();
+ }
+
if (IS_ERR(idxd_desc)) {
dev_dbg(dev, "idxd descriptor allocation failed\n");
dev_dbg(dev, "iaa compress failed: ret=%ld\n",
@@ -1604,14 +1610,10 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
goto err;
}
- idxd_free_desc(wq, idxd_desc);
-out:
- return ret;
err:
idxd_free_desc(wq, idxd_desc);
- dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
- goto out;
+ return ret;
}
static void iaa_desc_complete(struct idxd_desc *idxd_desc,
@@ -1727,7 +1729,8 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
struct iaa_device_compression_mode *active_compression_mode;
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
struct iaa_device *iaa_device;
- struct idxd_desc *idxd_desc;
+ struct idxd_desc *idxd_desc = ERR_PTR(-EAGAIN);
+ u16 alloc_desc_retries = 0;
struct iax_hw_desc *desc;
struct idxd_device *idxd;
struct iaa_wq *iaa_wq;
@@ -1743,7 +1746,11 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
- idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+ while ((idxd_desc == ERR_PTR(-EAGAIN)) && (alloc_desc_retries++ < IAA_ALLOC_DESC_COMP_TIMEOUT)) {
+ idxd_desc = idxd_alloc_desc(wq, IDXD_OP_NONBLOCK);
+ cpu_relax();
+ }
+
if (IS_ERR(idxd_desc)) {
dev_dbg(dev, "idxd descriptor allocation failed\n");
dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
@@ -1820,15 +1827,10 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
*compression_crc = idxd_desc->iax_completion->crc;
- if (!ctx->async_mode)
- idxd_free_desc(wq, idxd_desc);
-out:
- return ret;
err:
idxd_free_desc(wq, idxd_desc);
- dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
-
- goto out;
+out:
+ return ret;
}
static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
@@ -1840,7 +1842,8 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
struct iaa_device_compression_mode *active_compression_mode;
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
struct iaa_device *iaa_device;
- struct idxd_desc *idxd_desc;
+ struct idxd_desc *idxd_desc = ERR_PTR(-EAGAIN);
+ u16 alloc_desc_retries = 0;
struct iax_hw_desc *desc;
struct idxd_device *idxd;
struct iaa_wq *iaa_wq;
@@ -1856,12 +1859,18 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
- idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+ while ((idxd_desc == ERR_PTR(-EAGAIN)) && (alloc_desc_retries++ < IAA_ALLOC_DESC_DECOMP_TIMEOUT)) {
+ idxd_desc = idxd_alloc_desc(wq, IDXD_OP_NONBLOCK);
+ cpu_relax();
+ }
+
if (IS_ERR(idxd_desc)) {
dev_dbg(dev, "idxd descriptor allocation failed\n");
dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
PTR_ERR(idxd_desc));
- return PTR_ERR(idxd_desc);
+ ret = PTR_ERR(idxd_desc);
+ idxd_desc = NULL;
+ goto fallback_software_decomp;
}
desc = idxd_desc->iax_hw;
@@ -1905,7 +1914,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
ret = idxd_submit_desc(wq, idxd_desc);
if (ret) {
dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
- goto err;
+ goto fallback_software_decomp;
}
/* Update stats */
@@ -1919,40 +1928,37 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
}
ret = check_completion(dev, idxd_desc->iax_completion, false, false);
+
+fallback_software_decomp:
if (ret) {
- dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
- if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
+ dev_dbg(dev, "%s: desc allocation/submission/check_completion failed ret=%d\n", __func__, ret);
+ if (idxd_desc && idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
pr_warn("%s: falling back to deflate-generic decompress, "
"analytics error code %x\n", __func__,
idxd_desc->iax_completion->error_code);
- ret = deflate_generic_decompress(req);
- if (ret) {
- dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
- __func__, ret);
- goto err;
- }
- } else {
+ }
+
+ ret = deflate_generic_decompress(req);
+
+ if (ret) {
+ pr_err("%s: iaa decompress failed: deflate-generic fallback to software decompress error ret=%d\n", __func__, ret);
goto err;
}
} else {
req->dlen = idxd_desc->iax_completion->output_size;
+
+ /* Update stats */
+ update_total_decomp_bytes_in(slen);
+ update_wq_decomp_bytes(wq, slen);
}
*dlen = req->dlen;
- if (!ctx->async_mode || disable_async)
+err:
+ if (idxd_desc)
idxd_free_desc(wq, idxd_desc);
-
- /* Update stats */
- update_total_decomp_bytes_in(slen);
- update_wq_decomp_bytes(wq, slen);
out:
return ret;
-err:
- idxd_free_desc(wq, idxd_desc);
- dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
-
- goto out;
}
static int iaa_comp_acompress(struct acomp_req *req)
--
2.27.0
Powered by blists - more mailing lists