[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250127080254.1302026-6-senozhatsky@chromium.org>
Date: Mon, 27 Jan 2025 16:59:30 +0900
From: Sergey Senozhatsky <senozhatsky@...omium.org>
To: Andrew Morton <akpm@...ux-foundation.org>,
Minchan Kim <minchan@...nel.org>,
Johannes Weiner <hannes@...xchg.org>,
Yosry Ahmed <yosry.ahmed@...ux.dev>,
Nhat Pham <nphamcs@...il.com>
Cc: linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
Sergey Senozhatsky <senozhatsky@...omium.org>
Subject: [RFC PATCH 5/6] zsmalloc: introduce handle mapping API
Introduce new API to map/unmap zsmalloc handle/object. The key
difference is that this API does not impose atomicity restrictions
on its users, unlike zs_map_object() which returns with page-faults
and preemption disabled - handle mapping API does not need a per-CPU
vm-area because the users are required to provide an aux buffer for
objects that span several physical pages.
Keep zs_map_object/zs_unmap_object for the time being, as there are
still users of it, but eventually old API will be removed.
Signed-off-by: Sergey Senozhatsky <senozhatsky@...omium.org>
---
include/linux/zsmalloc.h | 29 ++++++++
mm/zsmalloc.c | 148 ++++++++++++++++++++++++++++-----------
2 files changed, 138 insertions(+), 39 deletions(-)
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index a48cd0ffe57d..72d84537dd38 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -58,4 +58,33 @@ unsigned long zs_compact(struct zs_pool *pool);
unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
+
+struct zs_handle_mapping {
+ unsigned long handle;
+ /* Points to start of the object data either within local_copy or
+ * within local_mapping. This is what callers should use to access
+ * or modify handle data.
+ */
+ void *handle_mem;
+
+ enum zs_mapmode mode;
+ union {
+ /*
+ * Handle object data copied, because it spans across several
+ * (non-contiguous) physical pages. This pointer should be
+ * set by the zs_map_handle() caller beforehand and should
+ * never be accessed directly.
+ */
+ void *local_copy;
+ /*
+ * Handle object mapped directly. Should never be used
+ * directly.
+ */
+ void *local_mapping;
+ };
+};
+
+int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map);
+void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map);
+
#endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a5c1f9852072..281bba4a3277 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1132,18 +1132,14 @@ static inline void __zs_cpu_down(struct mapping_area *area)
area->vm_buf = NULL;
}
-static void *__zs_map_object(struct mapping_area *area,
- struct zpdesc *zpdescs[2], int off, int size)
+static void zs_obj_copyin(void *buf, struct zpdesc *zpdesc, int off, int size)
{
+ struct zpdesc *zpdescs[2];
size_t sizes[2];
- char *buf = area->vm_buf;
-
- /* disable page faults to match kmap_local_page() return conditions */
- pagefault_disable();
- /* no read fastpath */
- if (area->vm_mm == ZS_MM_WO)
- goto out;
+ zpdescs[0] = zpdesc;
+ zpdescs[1] = get_next_zpdesc(zpdesc);
+ BUG_ON(!zpdescs[1]);
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
@@ -1151,21 +1147,17 @@ static void *__zs_map_object(struct mapping_area *area,
/* copy object to per-cpu buffer */
memcpy_from_page(buf, zpdesc_page(zpdescs[0]), off, sizes[0]);
memcpy_from_page(buf + sizes[0], zpdesc_page(zpdescs[1]), 0, sizes[1]);
-out:
- return area->vm_buf;
}
-static void __zs_unmap_object(struct mapping_area *area,
- struct zpdesc *zpdescs[2], int off, int size)
+static void zs_obj_copyout(void *buf, struct zpdesc *zpdesc, int off, int size)
{
+ struct zpdesc *zpdescs[2];
size_t sizes[2];
- char *buf;
- /* no write fastpath */
- if (area->vm_mm == ZS_MM_RO)
- goto out;
+ zpdescs[0] = zpdesc;
+ zpdescs[1] = get_next_zpdesc(zpdesc);
+ BUG_ON(!zpdescs[1]);
- buf = area->vm_buf;
buf = buf + ZS_HANDLE_SIZE;
size -= ZS_HANDLE_SIZE;
off += ZS_HANDLE_SIZE;
@@ -1176,10 +1168,6 @@ static void __zs_unmap_object(struct mapping_area *area,
/* copy per-cpu buffer to object */
memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]);
memcpy_to_page(zpdesc_page(zpdescs[1]), 0, buf + sizes[0], sizes[1]);
-
-out:
- /* enable page faults to match kunmap_local() return conditions */
- pagefault_enable();
}
static int zs_cpu_prepare(unsigned int cpu)
@@ -1260,6 +1248,8 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages);
* against nested mappings.
*
* This function returns with preemption and page faults disabled.
+ *
+ * NOTE: this function is deprecated and will be removed.
*/
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm)
@@ -1268,10 +1258,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
struct zpdesc *zpdesc;
unsigned long obj, off;
unsigned int obj_idx;
-
struct size_class *class;
struct mapping_area *area;
- struct zpdesc *zpdescs[2];
void *ret;
/*
@@ -1309,12 +1297,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
goto out;
}
- /* this object spans two pages */
- zpdescs[0] = zpdesc;
- zpdescs[1] = get_next_zpdesc(zpdesc);
- BUG_ON(!zpdescs[1]);
+ ret = area->vm_buf;
+ /* disable page faults to match kmap_local_page() return conditions */
+ pagefault_disable();
+ if (mm != ZS_MM_WO) {
+ /* this object spans two pages */
+ zs_obj_copyin(area->vm_buf, zpdesc, off, class->size);
+ }
- ret = __zs_map_object(area, zpdescs, off, class->size);
out:
if (likely(!ZsHugePage(zspage)))
ret += ZS_HANDLE_SIZE;
@@ -1323,13 +1313,13 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
}
EXPORT_SYMBOL_GPL(zs_map_object);
+/* NOTE: this function is deprecated and will be removed. */
void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{
struct zspage *zspage;
struct zpdesc *zpdesc;
unsigned long obj, off;
unsigned int obj_idx;
-
struct size_class *class;
struct mapping_area *area;
@@ -1340,23 +1330,103 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
off = offset_in_page(class->size * obj_idx);
area = this_cpu_ptr(&zs_map_area);
- if (off + class->size <= PAGE_SIZE)
+ if (off + class->size <= PAGE_SIZE) {
kunmap_local(area->vm_addr);
- else {
- struct zpdesc *zpdescs[2];
+ goto out;
+ }
- zpdescs[0] = zpdesc;
- zpdescs[1] = get_next_zpdesc(zpdesc);
- BUG_ON(!zpdescs[1]);
+ if (area->vm_mm != ZS_MM_RO)
+ zs_obj_copyout(area->vm_buf, zpdesc, off, class->size);
+ /* enable page faults to match kunmap_local() return conditions */
+ pagefault_enable();
- __zs_unmap_object(area, zpdescs, off, class->size);
- }
+out:
local_unlock(&zs_map_area.lock);
-
zspage_read_unlock(zspage);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
+void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map)
+{
+ struct zspage *zspage;
+ struct zpdesc *zpdesc;
+ unsigned long obj, off;
+ unsigned int obj_idx;
+ struct size_class *class;
+
+ obj = handle_to_obj(map->handle);
+ obj_to_location(obj, &zpdesc, &obj_idx);
+ zspage = get_zspage(zpdesc);
+ class = zspage_class(pool, zspage);
+ off = offset_in_page(class->size * obj_idx);
+
+ if (off + class->size <= PAGE_SIZE) {
+ kunmap_local(map->local_mapping);
+ goto out;
+ }
+
+ if (map->mode != ZS_MM_RO)
+ zs_obj_copyout(map->local_copy, zpdesc, off, class->size);
+
+out:
+ zspage_read_unlock(zspage);
+}
+EXPORT_SYMBOL_GPL(zs_unmap_handle);
+
+int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map)
+{
+ struct zspage *zspage;
+ struct zpdesc *zpdesc;
+ unsigned long obj, off;
+ unsigned int obj_idx;
+ struct size_class *class;
+
+ WARN_ON(in_interrupt());
+
+ /* It guarantees it can get zspage from handle safely */
+ pool_read_lock(pool);
+ obj = handle_to_obj(map->handle);
+ obj_to_location(obj, &zpdesc, &obj_idx);
+ zspage = get_zspage(zpdesc);
+
+ /*
+ * migration cannot move any zpages in this zspage. Here, class->lock
+ * is too heavy since callers would take some time until they calls
+ * zs_unmap_object API so delegate the locking from class to zspage
+ * which is smaller granularity.
+ */
+ zspage_read_lock(zspage);
+ pool_read_unlock(pool);
+
+ class = zspage_class(pool, zspage);
+ off = offset_in_page(class->size * obj_idx);
+
+ if (off + class->size <= PAGE_SIZE) {
+ /* this object is contained entirely within a page */
+ map->local_mapping = kmap_local_zpdesc(zpdesc);
+ map->handle_mem = map->local_mapping + off;
+ goto out;
+ }
+
+ if (WARN_ON_ONCE(!map->local_copy)) {
+ zspage_read_unlock(zspage);
+ return -EINVAL;
+ }
+
+ map->handle_mem = map->local_copy;
+ if (map->mode != ZS_MM_WO) {
+ /* this object spans two pages */
+ zs_obj_copyin(map->local_copy, zpdesc, off, class->size);
+ }
+
+out:
+ if (likely(!ZsHugePage(zspage)))
+ map->handle_mem += ZS_HANDLE_SIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(zs_map_handle);
+
/**
* zs_huge_class_size() - Returns the size (in bytes) of the first huge
* zsmalloc &size_class.
--
2.48.1.262.g85cc9f2d1e-goog
Powered by blists - more mailing lists