lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241118103054.3191944-9-senozhatsky@chromium.org>
Date: Mon, 18 Nov 2024 19:30:39 +0900
From: Sergey Senozhatsky <senozhatsky@...omium.org>
To: Andrew Morton <akpm@...ux-foundation.org>,
	Minchan Kim <minchan@...nel.org>
Cc: linux-kernel@...r.kernel.org,
	Sergey Senozhatsky <senozhatsky@...omium.org>
Subject: [RFC PATCH 8/8] zram: introduce multi-handle entries

zsmalloc size-classes store more than one compressed object per
physical page, therefore internal fragmentation is expected and
quite common. Internal fragmentation is completely normal, once
the system gets low on memory zsmalloc attempts to defragment
its pool and release empty zspage-s. However, even this does not
guarantee 100% usage-ratio of pool memory due to the nature of
allocators.

ZRAM_HUGE objects, on the other hand, do not share physical pages
with another objects, because each such object is stored raw
(uncompressed) and occupies a whole physical page.

We, in fact, can get advantage of zsmalloc's internal fragmentation.
Instead of allocating a physical page for each huge object it is
possible to split such objects into smaller objects and store them
in regular size-classes, possibly using allocated but unused zspages'
space. Given that huge objects are stored raw, both write and read of
such objects require only memcpy() and don't need any extra temporary
storage / buffers.

Split ZRAM_HUGE objects into two 2048 objects are store those
parts in regular size-classes.  This now allocates and tracks
two zsmalloc handles for such entries.

Signed-off-by: Sergey Senozhatsky <senozhatsky@...omium.org>
---
 drivers/block/zram/zram_drv.c | 122 ++++++++++++++++++++++++++--------
 drivers/block/zram/zram_drv.h |  15 ++++-
 2 files changed, 108 insertions(+), 29 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 745207597e23..997b9bbf74e9 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -37,6 +37,16 @@
 
 #include "zram_drv.h"
 
+/*
+ * This determines sizes of the ZRAM_HUGE object split.  Currently we perform
+ * a 2-way split.  One part is soted in 2048 size-class and the other one is
+ * stored in the size-class above 2048.
+ *
+ * To store an object in a target size-class we need to subtrac zsmalloc
+ * handle size, which is added to each store objects.
+ */
+#define ZRAM_MULTI_HANDLE_HEAD_SZ	((PAGE_SIZE) / 2 - ZS_HANDLE_SIZE)
+
 static DEFINE_IDR(zram_index_idr);
 /* idr index must be protected */
 static DEFINE_MUTEX(zram_index_mutex);
@@ -93,6 +103,18 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
 	zram->table[index].handle = handle;
 }
 
+static struct zram_multi_handle *zram_get_multi_handle(struct zram *zram,
+						       u32 index)
+{
+	return zram->table[index].mhandle;
+}
+
+static void zram_set_multi_handle(struct zram *zram, u32 index,
+				  struct zram_multi_handle *mhandle)
+{
+	zram->table[index].mhandle = mhandle;
+}
+
 /* flag operations require table entry bit_spin_lock() being held */
 static bool zram_test_flag(struct zram *zram, u32 index,
 			enum zram_pageflags flag)
@@ -1479,8 +1501,6 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
  */
 static void zram_free_page(struct zram *zram, size_t index)
 {
-	unsigned long handle;
-
 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
 	zram->table[index].ac_time = 0;
 #endif
@@ -1490,11 +1510,6 @@ static void zram_free_page(struct zram *zram, size_t index)
 	zram_clear_flag(zram, index, ZRAM_PP_SLOT);
 	zram_set_priority(zram, index, 0);
 
-	if (zram_test_flag(zram, index, ZRAM_HUGE)) {
-		zram_clear_flag(zram, index, ZRAM_HUGE);
-		atomic64_dec(&zram->stats.huge_pages);
-	}
-
 	if (zram_test_flag(zram, index, ZRAM_WB)) {
 		zram_clear_flag(zram, index, ZRAM_WB);
 		free_block_bdev(zram, zram_get_handle(zram, index));
@@ -1511,11 +1526,26 @@ static void zram_free_page(struct zram *zram, size_t index)
 		goto out;
 	}
 
-	handle = zram_get_handle(zram, index);
-	if (!handle)
-		return;
+	if (zram_test_flag(zram, index, ZRAM_HUGE)) {
+		struct zram_multi_handle *handle;
+
+		handle = zram_get_multi_handle(zram, index);
+		if (!handle)
+			return;
 
-	zs_free(zram->mem_pool, handle);
+		zs_free(zram->mem_pool, handle->head);
+		zs_free(zram->mem_pool, handle->tail);
+		kfree(handle);
+
+		zram_clear_flag(zram, index, ZRAM_HUGE);
+		atomic64_dec(&zram->stats.huge_pages);
+	} else {
+		unsigned long handle = zram_get_handle(zram, index);
+
+		if (!handle)
+			return;
+		zs_free(zram->mem_pool, handle);
+	}
 
 	atomic64_sub(zram_get_obj_size(zram, index),
 		     &zram->stats.compr_data_size);
@@ -1528,16 +1558,22 @@ static void zram_free_page(struct zram *zram, size_t index)
 static int read_incompressible_page(struct zram *zram, struct page *page,
 				    u32 index)
 {
-	unsigned long handle;
+	struct zram_multi_handle *handle;
 	void *src, *dst;
 
-	handle = zram_get_handle(zram, index);
-	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
+	handle = zram_get_multi_handle(zram, index);
 	dst = kmap_local_page(page);
-	copy_page(dst, src);
-	kunmap_local(dst);
-	zs_unmap_object(zram->mem_pool, handle);
 
+	src = zs_map_object(zram->mem_pool, handle->head, ZS_MM_RO);
+	memcpy(dst, src, ZRAM_MULTI_HANDLE_HEAD_SZ);
+	zs_unmap_object(zram->mem_pool, handle->head);
+
+	src = zs_map_object(zram->mem_pool, handle->tail, ZS_MM_RO);
+	memcpy(dst + ZRAM_MULTI_HANDLE_HEAD_SZ, src,
+	       PAGE_SIZE - ZRAM_MULTI_HANDLE_HEAD_SZ);
+	zs_unmap_object(zram->mem_pool, handle->tail);
+
+	kunmap_local(dst);
 	return 0;
 }
 
@@ -1648,34 +1684,56 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 static int zram_write_incompressible_page(struct zram *zram, struct page *page,
 					  u32 index)
 {
-	unsigned long handle;
+	struct zram_multi_handle *handle;
 	void *src, *dst;
+	int ret;
 
 	/*
 	 * This function is called from preemptible context so we don't need
 	 * to do optimistic and fallback to pessimistic handle allocation,
 	 * like we do for compressible pages.
 	 */
-	handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
-			   GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE);
-	if (IS_ERR_VALUE(handle))
-		return PTR_ERR((void *)handle);
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->head = zs_malloc(zram->mem_pool, ZRAM_MULTI_HANDLE_HEAD_SZ,
+				 GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE);
+	if (IS_ERR_VALUE(handle->head)) {
+		ret = PTR_ERR((void *)handle->head);
+		goto error;
+	}
+
+	handle->tail = zs_malloc(zram->mem_pool,
+				 PAGE_SIZE - ZRAM_MULTI_HANDLE_HEAD_SZ,
+				 GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE);
+	if (IS_ERR_VALUE(handle->tail)) {
+		ret = PTR_ERR((void *)handle->tail);
+		goto error;
+	}
 
 	if (!zram_can_store_page(zram)) {
 		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
-		zs_free(zram->mem_pool, handle);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto error;
 	}
 
-	dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
 	src = kmap_local_page(page);
-	memcpy(dst, src, PAGE_SIZE);
+
+	dst = zs_map_object(zram->mem_pool, handle->head, ZS_MM_WO);
+	memcpy(dst, src, ZRAM_MULTI_HANDLE_HEAD_SZ);
+	zs_unmap_object(zram->mem_pool, handle->head);
+
+	dst = zs_map_object(zram->mem_pool, handle->tail, ZS_MM_WO);
+	memcpy(dst, src + ZRAM_MULTI_HANDLE_HEAD_SZ,
+	       PAGE_SIZE - ZRAM_MULTI_HANDLE_HEAD_SZ);
+	zs_unmap_object(zram->mem_pool, handle->tail);
+
 	kunmap_local(src);
-	zs_unmap_object(zram->mem_pool, handle);
 
 	zram_slot_lock(zram, index);
 	zram_set_flag(zram, index, ZRAM_HUGE);
-	zram_set_handle(zram, index, handle);
+	zram_set_multi_handle(zram, index, handle);
 	zram_set_obj_size(zram, index, PAGE_SIZE);
 	zram_slot_unlock(zram, index);
 
@@ -1685,6 +1743,14 @@ static int zram_write_incompressible_page(struct zram *zram, struct page *page,
 	atomic64_inc(&zram->stats.pages_stored);
 
 	return 0;
+
+error:
+	if (!IS_ERR_VALUE(handle->head))
+		zs_free(zram->mem_pool, handle->head);
+	if (!IS_ERR_VALUE(handle->tail))
+		zs_free(zram->mem_pool, handle->tail);
+	kfree(handle);
+	return ret;
 }
 
 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index db78d7c01b9a..7bc7792c2fef 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -60,9 +60,22 @@ enum zram_pageflags {
 
 /*-- Data structures */
 
+/*
+ * Unlike regular zram table entries, ZRAM_HUGE entries are stored in zsmalloc
+ * as smaller objects in multiple locations (size-classes).  This keeps tracks
+ * of those locations.
+ */
+struct zram_multi_handle {
+	unsigned long head;
+	unsigned long tail;
+};
+
 /* Allocated for each disk page */
 struct zram_table_entry {
-	unsigned long handle;
+	union {
+		unsigned long handle;
+		struct zram_multi_handle *mhandle;
+	};
 	unsigned int flags;
 	spinlock_t lock;
 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
-- 
2.47.0.338.g60cca15819-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ