[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240617-zsmalloc-lock-mm-everything-v1-1-5e5081ea11b3@linux.dev>
Date: Mon, 17 Jun 2024 20:57:40 +0800
From: Chengming Zhou <chengming.zhou@...ux.dev>
To: Minchan Kim <minchan@...nel.org>,
Sergey Senozhatsky <senozhatsky@...omium.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Johannes Weiner <hannes@...xchg.org>, Yosry Ahmed <yosryahmed@...gle.com>,
Nhat Pham <nphamcs@...il.com>
Cc: Takero Funaki <flintglass@...il.com>,
Chengming Zhou <zhouchengming@...edance.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, Chengming Zhou <chengming.zhou@...ux.dev>
Subject: [PATCH 1/2] mm/zsmalloc: change back to per-size_class lock
From: Chengming Zhou <zhouchengming@...edance.com>
This patch is almost the revert of the commit c0547d0b6a4b ("zsmalloc:
consolidate zs_pool's migrate_lock and size_class's locks"), which
changed to use a global pool->lock instead of per-size_class lock and
pool->migrate_lock, was preparation for suppporting reclaim in zsmalloc.
Then reclaim in zsmalloc had been dropped in favor of LRU reclaim in
zswap.
In theory, per-size_class is more fine-grained than the pool->lock,
since a pool can have many size_classes. As for the additional
pool->migrate_lock, only free() and map() need to grab it to access
stable handle to get zspage, and only in read lock mode.
Signed-off-by: Chengming Zhou <zhouchengming@...edance.com>
---
mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++------------------------
1 file changed, 50 insertions(+), 35 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 44e0171d6003..fec1a39e5bbe 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -34,7 +34,8 @@
/*
* lock ordering:
* page_lock
- * pool->lock
+ * pool->migrate_lock
+ * class->lock
* zspage->lock
*/
@@ -183,6 +184,7 @@ static struct dentry *zs_stat_root;
static size_t huge_class_size;
struct size_class {
+ spinlock_t lock;
struct list_head fullness_list[NR_FULLNESS_GROUPS];
/*
* Size of objects stored in this class. Must be multiple
@@ -237,7 +239,8 @@ struct zs_pool {
#ifdef CONFIG_COMPACTION
struct work_struct free_work;
#endif
- spinlock_t lock;
+ /* protect page/zspage migration */
+ rwlock_t migrate_lock;
atomic_t compaction_in_progress;
};
@@ -336,7 +339,7 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
kmem_cache_free(pool->zspage_cachep, zspage);
}
-/* pool->lock(which owns the handle) synchronizes races */
+/* class->lock(which owns the handle) synchronizes races */
static void record_obj(unsigned long handle, unsigned long obj)
{
*(unsigned long *)handle = obj;
@@ -431,7 +434,7 @@ static __maybe_unused int is_first_page(struct page *page)
return PagePrivate(page);
}
-/* Protected by pool->lock */
+/* Protected by class->lock */
static inline int get_zspage_inuse(struct zspage *zspage)
{
return zspage->inuse;
@@ -569,7 +572,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
if (class->index != i)
continue;
- spin_lock(&pool->lock);
+ spin_lock(&class->lock);
seq_printf(s, " %5u %5u ", i, class->size);
for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++) {
@@ -580,7 +583,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
obj_allocated = zs_stat_get(class, ZS_OBJS_ALLOCATED);
obj_used = zs_stat_get(class, ZS_OBJS_INUSE);
freeable = zs_can_compact(class);
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
objs_per_zspage = class->objs_per_zspage;
pages_used = obj_allocated / objs_per_zspage *
@@ -837,7 +840,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
{
struct page *page, *next;
- assert_spin_locked(&pool->lock);
+ assert_spin_locked(&class->lock);
VM_BUG_ON(get_zspage_inuse(zspage));
VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0);
@@ -1196,19 +1199,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
BUG_ON(in_interrupt());
/* It guarantees it can get zspage from handle safely */
- spin_lock(&pool->lock);
+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(handle);
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
/*
- * migration cannot move any zpages in this zspage. Here, pool->lock
+ * migration cannot move any zpages in this zspage. Here, class->lock
* is too heavy since callers would take some time until they calls
* zs_unmap_object API so delegate the locking from class to zspage
* which is smaller granularity.
*/
migrate_read_lock(zspage);
- spin_unlock(&pool->lock);
+ read_unlock(&pool->migrate_lock);
class = zspage_class(pool, zspage);
off = offset_in_page(class->size * obj_idx);
@@ -1364,8 +1367,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
size += ZS_HANDLE_SIZE;
class = pool->size_class[get_size_class_index(size)];
- /* pool->lock effectively protects the zpage migration */
- spin_lock(&pool->lock);
+ /* class->lock effectively protects the zpage migration */
+ spin_lock(&class->lock);
zspage = find_get_zspage(class);
if (likely(zspage)) {
obj = obj_malloc(pool, zspage, handle);
@@ -1377,7 +1380,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
goto out;
}
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
zspage = alloc_zspage(pool, class, gfp);
if (!zspage) {
@@ -1385,7 +1388,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
return (unsigned long)ERR_PTR(-ENOMEM);
}
- spin_lock(&pool->lock);
+ spin_lock(&class->lock);
obj = obj_malloc(pool, zspage, handle);
newfg = get_fullness_group(class, zspage);
insert_zspage(class, zspage, newfg);
@@ -1397,7 +1400,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
out:
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
return handle;
}
@@ -1442,14 +1445,16 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
return;
/*
- * The pool->lock protects the race with zpage's migration
+ * The pool->migrate_lock protects the race with zpage's migration
* so it's safe to get the page from handle.
*/
- spin_lock(&pool->lock);
+ read_lock(&pool->migrate_lock);
obj = handle_to_obj(handle);
obj_to_page(obj, &f_page);
zspage = get_zspage(f_page);
class = zspage_class(pool, zspage);
+ spin_lock(&class->lock);
+ read_unlock(&pool->migrate_lock);
class_stat_dec(class, ZS_OBJS_INUSE, 1);
obj_free(class->size, obj);
@@ -1458,7 +1463,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
if (fullness == ZS_INUSE_RATIO_0)
free_zspage(pool, class, zspage);
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
cache_free_handle(pool, handle);
}
EXPORT_SYMBOL_GPL(zs_free);
@@ -1780,12 +1785,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
pool = zspage->pool;
/*
- * The pool's lock protects the race between zpage migration
+ * The pool migrate_lock protects the race between zpage migration
* and zs_free.
*/
- spin_lock(&pool->lock);
+ write_lock(&pool->migrate_lock);
class = zspage_class(pool, zspage);
+ /*
+ * the class lock protects zpage alloc/free in the zspage.
+ */
+ spin_lock(&class->lock);
/* the migrate_write_lock protects zpage access via zs_map_object */
migrate_write_lock(zspage);
@@ -1815,9 +1824,10 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
replace_sub_page(class, zspage, newpage, page);
/*
* Since we complete the data copy and set up new zspage structure,
- * it's okay to release the pool's lock.
+ * it's okay to release migration_lock.
*/
- spin_unlock(&pool->lock);
+ write_unlock(&pool->migrate_lock);
+ spin_unlock(&class->lock);
migrate_write_unlock(zspage);
get_page(newpage);
@@ -1861,20 +1871,20 @@ static void async_free_zspage(struct work_struct *work)
if (class->index != i)
continue;
- spin_lock(&pool->lock);
+ spin_lock(&class->lock);
list_splice_init(&class->fullness_list[ZS_INUSE_RATIO_0],
&free_pages);
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
}
list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
list_del(&zspage->list);
lock_zspage(zspage);
- spin_lock(&pool->lock);
class = zspage_class(pool, zspage);
+ spin_lock(&class->lock);
__free_zspage(pool, class, zspage);
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
}
};
@@ -1938,7 +1948,8 @@ static unsigned long __zs_compact(struct zs_pool *pool,
* protect the race between zpage migration and zs_free
* as well as zpage allocation/free
*/
- spin_lock(&pool->lock);
+ write_lock(&pool->migrate_lock);
+ spin_lock(&class->lock);
while (zs_can_compact(class)) {
int fg;
@@ -1964,13 +1975,15 @@ static unsigned long __zs_compact(struct zs_pool *pool,
src_zspage = NULL;
if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100
- || spin_is_contended(&pool->lock)) {
+ || rwlock_is_contended(&pool->migrate_lock)) {
putback_zspage(class, dst_zspage);
dst_zspage = NULL;
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
+ write_unlock(&pool->migrate_lock);
cond_resched();
- spin_lock(&pool->lock);
+ write_lock(&pool->migrate_lock);
+ spin_lock(&class->lock);
}
}
@@ -1980,7 +1993,8 @@ static unsigned long __zs_compact(struct zs_pool *pool,
if (dst_zspage)
putback_zspage(class, dst_zspage);
- spin_unlock(&pool->lock);
+ spin_unlock(&class->lock);
+ write_unlock(&pool->migrate_lock);
return pages_freed;
}
@@ -1992,10 +2006,10 @@ unsigned long zs_compact(struct zs_pool *pool)
unsigned long pages_freed = 0;
/*
- * Pool compaction is performed under pool->lock so it is basically
+ * Pool compaction is performed under pool->migrate_lock so it is basically
* single-threaded. Having more than one thread in __zs_compact()
- * will increase pool->lock contention, which will impact other
- * zsmalloc operations that need pool->lock.
+ * will increase pool->migrate_lock contention, which will impact other
+ * zsmalloc operations that need pool->migrate_lock.
*/
if (atomic_xchg(&pool->compaction_in_progress, 1))
return 0;
@@ -2117,7 +2131,7 @@ struct zs_pool *zs_create_pool(const char *name)
return NULL;
init_deferred_free(pool);
- spin_lock_init(&pool->lock);
+ rwlock_init(&pool->migrate_lock);
atomic_set(&pool->compaction_in_progress, 0);
pool->name = kstrdup(name, GFP_KERNEL);
@@ -2189,6 +2203,7 @@ struct zs_pool *zs_create_pool(const char *name)
class->index = i;
class->pages_per_zspage = pages_per_zspage;
class->objs_per_zspage = objs_per_zspage;
+ spin_lock_init(&class->lock);
pool->size_class[i] = class;
fullness = ZS_INUSE_RATIO_0;
--
2.45.2
Powered by blists - more mailing lists