lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250225213200.729056-1-nphamcs@gmail.com>
Date: Tue, 25 Feb 2025 13:32:00 -0800
From: Nhat Pham <nphamcs@...il.com>
To: akpm@...ux-foundation.org
Cc: hannes@...xchg.org,
	yosryahmed@...gle.com,
	chengming.zhou@...ux.dev,
	linux-mm@...ck.org,
	kernel-team@...a.com,
	linux-kernel@...r.kernel.org
Subject: [PATCH] zswap: do not crash the kernel on decompression failure

Currently, we crash the kernel when a decompression failure occurs in
zswap (either because of memory corruption, or a bug in the compression
algorithm). This is overkill. We should only SIGBUS the unfortunate
process asking for the zswap entry on zswap load, and skip the corrupted
entry in zswap writeback.

See [1] for a recent upstream discussion about this.

[1]: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/

Suggested-by: Matthew Wilcox <willy@...radead.org>
Suggested-by: Yosry Ahmed <yosryahmed@...gle.com>
Signed-off-by: Nhat Pham <nphamcs@...il.com>
---
 mm/zswap.c | 85 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 27 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb23..31d4397eed61 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -62,6 +62,8 @@ static u64 zswap_reject_reclaim_fail;
 static u64 zswap_reject_compress_fail;
 /* Compressed page was too big for the allocator to (optimally) store */
 static u64 zswap_reject_compress_poor;
+/* Load and writeback failed due to decompression failure */
+static u64 zswap_reject_decompress_fail;
 /* Store failed because underlying allocator could not get memory */
 static u64 zswap_reject_alloc_fail;
 /* Store failed because the entry metadata could not be allocated (rare) */
@@ -953,11 +955,12 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	return comp_ret == 0 && alloc_ret == 0;
 }
 
-static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
+static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 {
 	struct zpool *zpool = entry->pool->zpool;
 	struct scatterlist input, output;
 	struct crypto_acomp_ctx *acomp_ctx;
+	bool ret = true;
 	u8 *src;
 
 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
@@ -984,12 +987,19 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	sg_init_table(&output, 1);
 	sg_set_folio(&output, folio, PAGE_SIZE, 0);
 	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
-	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
-	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+	if (crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait) ||
+			acomp_ctx->req->dlen != PAGE_SIZE) {
+		ret = false;
+		zswap_reject_decompress_fail++;
+		pr_alert_ratelimited(
+			"decompression failed on zswap entry with offset %08lx\n",
+			entry->swpentry.val);
+	}
 	mutex_unlock(&acomp_ctx->mutex);
 
 	if (src != acomp_ctx->buffer)
 		zpool_unmap_handle(zpool, entry->handle);
+	return ret;
 }
 
 /*********************************
@@ -1018,6 +1028,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 	};
+	int ret = 0;
 
 	/* try to allocate swap cache folio */
 	mpol = get_task_policy(current);
@@ -1034,8 +1045,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	 * and freed when invalidated by the concurrent shrinker anyway.
 	 */
 	if (!folio_was_allocated) {
-		folio_put(folio);
-		return -EEXIST;
+		ret = -EEXIST;
+		goto put_folio;
 	}
 
 	/*
@@ -1048,14 +1059,17 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	 * be dereferenced.
 	 */
 	tree = swap_zswap_tree(swpentry);
-	if (entry != xa_cmpxchg(tree, offset, entry, NULL, GFP_KERNEL)) {
-		delete_from_swap_cache(folio);
-		folio_unlock(folio);
-		folio_put(folio);
-		return -ENOMEM;
+	if (entry != xa_load(tree, offset)) {
+		ret = -ENOMEM;
+		goto fail;
 	}
 
-	zswap_decompress(entry, folio);
+	if (!zswap_decompress(entry, folio)) {
+		ret = -EIO;
+		goto fail;
+	}
+
+	xa_erase(tree, offset);
 
 	count_vm_event(ZSWPWB);
 	if (entry->objcg)
@@ -1071,9 +1085,14 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 
 	/* start writeback */
 	__swap_writepage(folio, &wbc);
-	folio_put(folio);
+	goto put_folio;
 
-	return 0;
+fail:
+	delete_from_swap_cache(folio);
+	folio_unlock(folio);
+put_folio:
+	folio_put(folio);
+	return ret;
 }
 
 /*********************************
@@ -1600,6 +1619,29 @@ bool zswap_load(struct folio *folio)
 	if (WARN_ON_ONCE(folio_test_large(folio)))
 		return true;
 
+	/*
+	 * We cannot invalidate the zswap entry before decompressing it. If
+	 * decompression fails, we must keep the entry in the tree so that
+	 * a future read by another process on the same swap entry will also
+	 * have to go through zswap. Otherwise, we risk silently reading
+	 * corrupted data for the other process.
+	 */
+	entry = xa_load(tree, offset);
+	if (!entry)
+		return false;
+
+	/*
+	 * If decompression fails, we return true to notify the caller that the
+	 * folio's data were in zswap, but do not mark the folio as up-to-date.
+	 * This will effectively SIGBUS the calling process.
+	 */
+	if (!zswap_decompress(entry, folio))
+		return true;
+
+	count_vm_event(ZSWPIN);
+	if (entry->objcg)
+		count_objcg_events(entry->objcg, ZSWPIN, 1);
+
 	/*
 	 * When reading into the swapcache, invalidate our entry. The
 	 * swapcache can be the authoritative owner of the page and
@@ -1612,21 +1654,8 @@ bool zswap_load(struct folio *folio)
 	 * files, which reads into a private page and may free it if
 	 * the fault fails. We remain the primary owner of the entry.)
 	 */
-	if (swapcache)
-		entry = xa_erase(tree, offset);
-	else
-		entry = xa_load(tree, offset);
-
-	if (!entry)
-		return false;
-
-	zswap_decompress(entry, folio);
-
-	count_vm_event(ZSWPIN);
-	if (entry->objcg)
-		count_objcg_events(entry->objcg, ZSWPIN, 1);
-
 	if (swapcache) {
+		xa_erase(tree, offset);
 		zswap_entry_free(entry);
 		folio_mark_dirty(folio);
 	}
@@ -1727,6 +1756,8 @@ static int zswap_debugfs_init(void)
 			   zswap_debugfs_root, &zswap_reject_compress_fail);
 	debugfs_create_u64("reject_compress_poor", 0444,
 			   zswap_debugfs_root, &zswap_reject_compress_poor);
+	debugfs_create_u64("reject_decompress_fail", 0444,
+			   zswap_debugfs_root, &zswap_reject_decompress_fail);
 	debugfs_create_u64("written_back_pages", 0444,
 			   zswap_debugfs_root, &zswap_written_back_pages);
 	debugfs_create_file("pool_total_size", 0444,
-- 
2.43.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ