lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250127080254.1302026-3-senozhatsky@chromium.org>
Date: Mon, 27 Jan 2025 16:59:27 +0900
From: Sergey Senozhatsky <senozhatsky@...omium.org>
To: Andrew Morton <akpm@...ux-foundation.org>,
	Minchan Kim <minchan@...nel.org>,
	Johannes Weiner <hannes@...xchg.org>,
	Yosry Ahmed <yosry.ahmed@...ux.dev>,
	Nhat Pham <nphamcs@...il.com>
Cc: linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	Sergey Senozhatsky <senozhatsky@...omium.org>
Subject: [RFC PATCH 2/6] zsmalloc: make zspage lock preemptible

Switch over from rwlock_t to a atomic_t variable that takes
negative value when the page is under migration, or positive
values when the page is used by zsmalloc users (object map,
etc.)  Using a rwsem per-zspage is a little too memory heavy,
a simple atomic_t should suffice, after all we only need to
mark zspage as either used-for-write or used-for-read.  This
is needed to make zsmalloc preemtible in the future.

Signed-off-by: Sergey Senozhatsky <senozhatsky@...omium.org>
---
 mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 46 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 817626a351f8..28a75bfbeaa6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
 	__free_page(page);
 }
 
+#define ZS_PAGE_UNLOCKED	0
+#define ZS_PAGE_WRLOCKED	-1
+
 struct zspage {
 	struct {
 		unsigned int huge:HUGE_BITS;
@@ -269,7 +272,7 @@ struct zspage {
 	struct zpdesc *first_zpdesc;
 	struct list_head list; /* fullness list */
 	struct zs_pool *pool;
-	rwlock_t lock;
+	atomic_t lock;
 };
 
 struct mapping_area {
@@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage)
 	return zspage->huge;
 }
 
-static void migrate_lock_init(struct zspage *zspage);
-static void migrate_read_lock(struct zspage *zspage);
-static void migrate_read_unlock(struct zspage *zspage);
-static void migrate_write_lock(struct zspage *zspage);
-static void migrate_write_unlock(struct zspage *zspage);
+static void zspage_lock_init(struct zspage *zspage)
+{
+	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
+
+static void zspage_read_lock(struct zspage *zspage)
+{
+	atomic_t *lock = &zspage->lock;
+	int old;
+
+	while (1) {
+		old = atomic_read(lock);
+		if (old == ZS_PAGE_WRLOCKED) {
+			cpu_relax();
+			continue;
+		}
+
+		if (atomic_cmpxchg(lock, old, old + 1) == old)
+			return;
+
+		cpu_relax();
+	}
+}
+
+static void zspage_read_unlock(struct zspage *zspage)
+{
+	atomic_dec(&zspage->lock);
+}
+
+static void zspage_write_lock(struct zspage *zspage)
+{
+	atomic_t *lock = &zspage->lock;
+	int old;
+
+	while (1) {
+		old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
+		if (old == ZS_PAGE_UNLOCKED)
+			return;
+
+		cpu_relax();
+	}
+}
+
+static void zspage_write_unlock(struct zspage *zspage)
+{
+	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
 
 #ifdef CONFIG_COMPACTION
 static void kick_deferred_free(struct zs_pool *pool);
@@ -992,7 +1037,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
 		return NULL;
 
 	zspage->magic = ZSPAGE_MAGIC;
-	migrate_lock_init(zspage);
+	zspage_lock_init(zspage);
 
 	for (i = 0; i < class->pages_per_zspage; i++) {
 		struct zpdesc *zpdesc;
@@ -1217,7 +1262,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	 * zs_unmap_object API so delegate the locking from class to zspage
 	 * which is smaller granularity.
 	 */
-	migrate_read_lock(zspage);
+	zspage_read_lock(zspage);
 	read_unlock(&pool->migrate_lock);
 
 	class = zspage_class(pool, zspage);
@@ -1277,7 +1322,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 	}
 	local_unlock(&zs_map_area.lock);
 
-	migrate_read_unlock(zspage);
+	zspage_read_unlock(zspage);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
@@ -1671,18 +1716,18 @@ static void lock_zspage(struct zspage *zspage)
 	/*
 	 * Pages we haven't locked yet can be migrated off the list while we're
 	 * trying to lock them, so we need to be careful and only attempt to
-	 * lock each page under migrate_read_lock(). Otherwise, the page we lock
+	 * lock each page under zspage_read_lock(). Otherwise, the page we lock
 	 * may no longer belong to the zspage. This means that we may wait for
 	 * the wrong page to unlock, so we must take a reference to the page
-	 * prior to waiting for it to unlock outside migrate_read_lock().
+	 * prior to waiting for it to unlock outside zspage_read_lock().
 	 */
 	while (1) {
-		migrate_read_lock(zspage);
+		zspage_read_lock(zspage);
 		zpdesc = get_first_zpdesc(zspage);
 		if (zpdesc_trylock(zpdesc))
 			break;
 		zpdesc_get(zpdesc);
-		migrate_read_unlock(zspage);
+		zspage_read_unlock(zspage);
 		zpdesc_wait_locked(zpdesc);
 		zpdesc_put(zpdesc);
 	}
@@ -1693,41 +1738,16 @@ static void lock_zspage(struct zspage *zspage)
 			curr_zpdesc = zpdesc;
 		} else {
 			zpdesc_get(zpdesc);
-			migrate_read_unlock(zspage);
+			zspage_read_unlock(zspage);
 			zpdesc_wait_locked(zpdesc);
 			zpdesc_put(zpdesc);
-			migrate_read_lock(zspage);
+			zspage_read_lock(zspage);
 		}
 	}
-	migrate_read_unlock(zspage);
+	zspage_read_unlock(zspage);
 }
 #endif /* CONFIG_COMPACTION */
 
-static void migrate_lock_init(struct zspage *zspage)
-{
-	rwlock_init(&zspage->lock);
-}
-
-static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
-{
-	read_lock(&zspage->lock);
-}
-
-static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
-{
-	read_unlock(&zspage->lock);
-}
-
-static void migrate_write_lock(struct zspage *zspage)
-{
-	write_lock(&zspage->lock);
-}
-
-static void migrate_write_unlock(struct zspage *zspage)
-{
-	write_unlock(&zspage->lock);
-}
-
 #ifdef CONFIG_COMPACTION
 
 static const struct movable_operations zsmalloc_mops;
@@ -1803,8 +1823,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 	 * the class lock protects zpage alloc/free in the zspage.
 	 */
 	spin_lock(&class->lock);
-	/* the migrate_write_lock protects zpage access via zs_map_object */
-	migrate_write_lock(zspage);
+	/* the zspage_write_lock protects zpage access via zs_map_object */
+	zspage_write_lock(zspage);
 
 	offset = get_first_obj_offset(zpdesc);
 	s_addr = kmap_local_zpdesc(zpdesc);
@@ -1835,7 +1855,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 	 */
 	write_unlock(&pool->migrate_lock);
 	spin_unlock(&class->lock);
-	migrate_write_unlock(zspage);
+	zspage_write_unlock(zspage);
 
 	zpdesc_get(newzpdesc);
 	if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) {
@@ -1971,9 +1991,9 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 		if (!src_zspage)
 			break;
 
-		migrate_write_lock(src_zspage);
+		zspage_write_lock(src_zspage);
 		migrate_zspage(pool, src_zspage, dst_zspage);
-		migrate_write_unlock(src_zspage);
+		zspage_write_unlock(src_zspage);
 
 		fg = putback_zspage(class, src_zspage);
 		if (fg == ZS_INUSE_RATIO_0) {
-- 
2.48.1.262.g85cc9f2d1e-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ