[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2c0036a3678d9cf55045cd215e9959aa92e32cfc.1745528282.git.lorenzo.stoakes@oracle.com>
Date: Thu, 24 Apr 2025 22:15:29 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: "Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>, Jann Horn <jannh@...gle.com>,
Pedro Falcato <pfalcato@...e.de>, David Hildenbrand <david@...hat.com>,
Kees Cook <kees@...nel.org>, Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
Suren Baghdasaryan <surenb@...gle.com>, linux-mm@...ck.org,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [PATCH 4/4] mm: move vm_area_alloc,dup,free() functions to vma.c
Place the core VMA allocation, duplication, and freeing functions in vma.c
where we can isolate them to mm only providing proper encapsulation, and
add have the ability to integrate userland tests.
The only users of these functions are now either in mmap.c, vma.c or
nommu.c, the former two not being compiled for nommu so we need not make
any changes for nommu here.
Update the tools/testing/vma/* testing code appropriately to keep the tests
passing.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
include/linux/mm.h | 13 ---
mm/debug_vm_pgtable.c | 2 +
mm/mmap.c | 86 +-----------------
mm/nommu.c | 2 +-
mm/vma.c | 89 ++++++++++++++++++
mm/vma.h | 8 ++
tools/testing/vma/vma.c | 1 +
tools/testing/vma/vma_internal.h | 151 ++++++++++++++++++++++++-------
8 files changed, 221 insertions(+), 131 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa84e59a99bb..683df06e4a18 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -222,19 +222,6 @@ static inline struct folio *lru_to_folio(struct list_head *head)
void setup_initial_init_mm(void *start_code, void *end_code,
void *end_data, void *brk);
-/*
- * Linux kernel virtual memory manager primitives.
- * The idea being to have a "virtual" mm in the same way
- * we have a virtual fs - giving a cleaner interface to the
- * mm details, and allowing different kinds of memory mappings
- * (from shared memory to executable loading to arbitrary
- * mmap() functions).
- */
-
-struct vm_area_struct *vm_area_alloc(struct mm_struct *);
-struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
-void vm_area_free(struct vm_area_struct *);
-
#ifndef CONFIG_MMU
extern struct rb_root nommu_region_tree;
extern struct rw_semaphore nommu_region_sem;
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 7731b238b534..556c3f204d1b 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -36,6 +36,8 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
+#include "vma.h"
+
/*
* Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics
* expectations that are being validated here. All future changes in here
diff --git a/mm/mmap.c b/mm/mmap.c
index 31d2aa690fcc..0059a791cb7d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -77,82 +77,6 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
-/* SLAB cache for vm_area_struct structures */
-static struct kmem_cache *vm_area_cachep;
-
-struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
-
- vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
- if (!vma)
- return NULL;
-
- vma_init(vma, mm);
-
- return vma;
-}
-
-static void vm_area_init_from(const struct vm_area_struct *src,
- struct vm_area_struct *dest)
-{
- dest->vm_mm = src->vm_mm;
- dest->vm_ops = src->vm_ops;
- dest->vm_start = src->vm_start;
- dest->vm_end = src->vm_end;
- dest->anon_vma = src->anon_vma;
- dest->vm_pgoff = src->vm_pgoff;
- dest->vm_file = src->vm_file;
- dest->vm_private_data = src->vm_private_data;
- vm_flags_init(dest, src->vm_flags);
- memcpy(&dest->vm_page_prot, &src->vm_page_prot,
- sizeof(dest->vm_page_prot));
- /*
- * src->shared.rb may be modified concurrently when called from
- * dup_mmap(), but the clone will reinitialize it.
- */
- data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
- memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
- sizeof(dest->vm_userfaultfd_ctx));
-#ifdef CONFIG_ANON_VMA_NAME
- dest->anon_name = src->anon_name;
-#endif
-#ifdef CONFIG_SWAP
- memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
- sizeof(dest->swap_readahead_info));
-#endif
-#ifdef CONFIG_NUMA
- dest->vm_policy = src->vm_policy;
-#endif
-}
-
-struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
- struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-
- if (!new)
- return NULL;
-
- ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
- ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
- vm_area_init_from(orig, new);
- vma_lock_init(new, true);
- INIT_LIST_HEAD(&new->anon_vma_chain);
- vma_numab_state_init(new);
- dup_anon_vma_name(orig, new);
-
- return new;
-}
-
-void vm_area_free(struct vm_area_struct *vma)
-{
- /* The vma should be detached while being destroyed. */
- vma_assert_detached(vma);
- vma_numab_state_free(vma);
- free_anon_vma_name(vma);
- kmem_cache_free(vm_area_cachep, vma);
-}
-
/* Update vma->vm_page_prot to reflect vma->vm_flags. */
void vma_set_page_prot(struct vm_area_struct *vma)
{
@@ -1677,17 +1601,11 @@ static const struct ctl_table mmap_table[] = {
void __init mmap_init(void)
{
int ret;
- struct kmem_cache_args args = {
- .use_freeptr_offset = true,
- .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
- };
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
- vm_area_cachep = kmem_cache_create("vm_area_struct",
- sizeof(struct vm_area_struct), &args,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
- SLAB_ACCOUNT);
+
+ vma_state_init();
#ifdef CONFIG_SYSCTL
register_sysctl_init("vm", mmap_table);
#endif
diff --git a/mm/nommu.c b/mm/nommu.c
index 79a6b0460622..7d1ced10e08b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -97,7 +97,7 @@ static void vm_area_init_from(const struct vm_area_struct *src,
dest->vm_region = src->vm_region;
}
-struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+static struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
{
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
diff --git a/mm/vma.c b/mm/vma.c
index 8a6c5e835759..65507f12b8d3 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -57,6 +57,9 @@ struct mmap_state {
.state = VMA_MERGE_START, \
}
+/* SLAB cache for vm_area_struct structures */
+static struct kmem_cache *vm_area_cachep;
+
/*
* If, at any point, the VMA had unCoW'd mappings from parents, it will maintain
* more than one anon_vma_chain connecting it to more than one anon_vma. A merge
@@ -3052,3 +3055,89 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock)
userfaultfd_unmap_complete(mm, &uf);
return ret;
}
+
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ if (!vma)
+ return NULL;
+
+ vma_init(vma, mm);
+
+ return vma;
+}
+
+static void vm_area_init_from(const struct vm_area_struct *src,
+ struct vm_area_struct *dest)
+{
+ dest->vm_mm = src->vm_mm;
+ dest->vm_ops = src->vm_ops;
+ dest->vm_start = src->vm_start;
+ dest->vm_end = src->vm_end;
+ dest->anon_vma = src->anon_vma;
+ dest->vm_pgoff = src->vm_pgoff;
+ dest->vm_file = src->vm_file;
+ dest->vm_private_data = src->vm_private_data;
+ vm_flags_init(dest, src->vm_flags);
+ memcpy(&dest->vm_page_prot, &src->vm_page_prot,
+ sizeof(dest->vm_page_prot));
+ /*
+ * src->shared.rb may be modified concurrently when called from
+ * dup_mmap(), but the clone will reinitialize it.
+ */
+ data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
+ memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
+ sizeof(dest->vm_userfaultfd_ctx));
+#ifdef CONFIG_ANON_VMA_NAME
+ dest->anon_name = src->anon_name;
+#endif
+#ifdef CONFIG_SWAP
+ memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
+ sizeof(dest->swap_readahead_info));
+#endif
+#ifdef CONFIG_NUMA
+ dest->vm_policy = src->vm_policy;
+#endif
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+ struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ vm_area_init_from(orig, new);
+ vma_lock_init(new, true);
+ INIT_LIST_HEAD(&new->anon_vma_chain);
+ vma_numab_state_init(new);
+ dup_anon_vma_name(orig, new);
+
+ return new;
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+ /* The vma should be detached while being destroyed. */
+ vma_assert_detached(vma);
+ vma_numab_state_free(vma);
+ free_anon_vma_name(vma);
+ kmem_cache_free(vm_area_cachep, vma);
+}
+
+void __init vma_state_init(void)
+{
+ struct kmem_cache_args args = {
+ .use_freeptr_offset = true,
+ .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+ };
+
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), &args,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+ SLAB_ACCOUNT);
+}
diff --git a/mm/vma.h b/mm/vma.h
index 149926e8a6d1..be8597a85b07 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -548,4 +548,12 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
int __vm_munmap(unsigned long start, size_t len, bool unlock);
+#ifdef CONFIG_MMU
+struct vm_area_struct *vm_area_alloc(struct mm_struct *);
+void vm_area_free(struct vm_area_struct *);
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig);
+#endif
+
+extern void __init vma_state_init(void);
+
#endif /* __MM_VMA_H */
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 7cfd6e31db10..9932ceb2e4b9 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -1667,6 +1667,7 @@ int main(void)
int num_tests = 0, num_fail = 0;
maple_tree_init();
+ vma_state_init();
#define TEST(name) \
do { \
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 572ab2cea763..fbd0412571fb 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -135,6 +135,10 @@ typedef __bitwise unsigned int vm_fault_t;
*/
#define pr_warn_once pr_err
+#define data_race(expr) expr
+
+#define ASSERT_EXCLUSIVE_WRITER(x)
+
struct kref {
refcount_t refcount;
};
@@ -235,6 +239,8 @@ struct file {
#define VMA_LOCK_OFFSET 0x40000000
+typedef struct { unsigned long v; } freeptr_t;
+
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
@@ -244,9 +250,7 @@ struct vm_area_struct {
unsigned long vm_start;
unsigned long vm_end;
};
-#ifdef CONFIG_PER_VMA_LOCK
- struct rcu_head vm_rcu; /* Used for deferred freeing. */
-#endif
+ freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
};
struct mm_struct *vm_mm; /* The address space we belong to. */
@@ -421,6 +425,65 @@ struct vm_unmapped_area_info {
unsigned long start_gap;
};
+struct kmem_cache_args {
+ /**
+ * @align: The required alignment for the objects.
+ *
+ * %0 means no specific alignment is requested.
+ */
+ unsigned int align;
+ /**
+ * @useroffset: Usercopy region offset.
+ *
+ * %0 is a valid offset, when @usersize is non-%0
+ */
+ unsigned int useroffset;
+ /**
+ * @usersize: Usercopy region size.
+ *
+ * %0 means no usercopy region is specified.
+ */
+ unsigned int usersize;
+ /**
+ * @freeptr_offset: Custom offset for the free pointer
+ * in &SLAB_TYPESAFE_BY_RCU caches
+ *
+ * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+ * outside of the object. This might cause the object to grow in size.
+ * Cache creators that have a reason to avoid this can specify a custom
+ * free pointer offset in their struct where the free pointer will be
+ * placed.
+ *
+ * Note that placing the free pointer inside the object requires the
+ * caller to ensure that no fields are invalidated that are required to
+ * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+ * details).
+ *
+ * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+ * is specified, %use_freeptr_offset must be set %true.
+ *
+ * Note that @ctor currently isn't supported with custom free pointers
+ * as a @ctor requires an external free pointer.
+ */
+ unsigned int freeptr_offset;
+ /**
+ * @use_freeptr_offset: Whether a @freeptr_offset is used.
+ */
+ bool use_freeptr_offset;
+ /**
+ * @ctor: A constructor for the objects.
+ *
+ * The constructor is invoked for each object in a newly allocated slab
+ * page. It is the cache user's responsibility to free object in the
+ * same state as after calling the constructor, or deal appropriately
+ * with any differences between a freshly constructed and a reallocated
+ * object.
+ *
+ * %NULL means no constructor.
+ */
+ void (*ctor)(void *);
+};
+
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
{
mas_pause(&vmi->mas);
@@ -496,40 +559,39 @@ extern const struct vm_operations_struct vma_dummy_vm_ops;
extern unsigned long rlimit(unsigned int limit);
-static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
-{
- memset(vma, 0, sizeof(*vma));
- vma->vm_mm = mm;
- vma->vm_ops = &vma_dummy_vm_ops;
- INIT_LIST_HEAD(&vma->anon_vma_chain);
- vma->vm_lock_seq = UINT_MAX;
-}
-static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
- struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
+struct kmem_cache {
+ const char *name;
+ size_t object_size;
+ struct kmem_cache_args *args;
+};
- if (!vma)
- return NULL;
+static inline struct kmem_cache *__kmem_cache_create(const char *name,
+ size_t object_size,
+ struct kmem_cache_args *args)
+{
+ struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
- vma_init(vma, mm);
+ ret->name = name;
+ ret->object_size = object_size;
+ ret->args = args;
- return vma;
+ return ret;
}
-static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
- struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
+#define kmem_cache_create(__name, __object_size, __args, ...) \
+ __kmem_cache_create((__name), (__object_size), (__args))
- if (!new)
- return NULL;
+static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+{
+ (void)gfpflags;
- memcpy(new, orig, sizeof(*new));
- refcount_set(&new->vm_refcnt, 0);
- new->vm_lock_seq = UINT_MAX;
- INIT_LIST_HEAD(&new->anon_vma_chain);
+ return calloc(s->object_size, 1);
+}
- return new;
+static inline void kmem_cache_free(struct kmem_cache *s, void *x)
+{
+ free(x);
}
/*
@@ -696,11 +758,6 @@ static inline void mpol_put(struct mempolicy *)
{
}
-static inline void vm_area_free(struct vm_area_struct *vma)
-{
- free(vma);
-}
-
static inline void lru_add_drain(void)
{
}
@@ -1240,4 +1297,32 @@ static inline int mapping_map_writable(struct address_space *mapping)
return 0;
}
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+ (void)vma;
+ (void)reset_refcnt;
+}
+
+static inline void vma_numab_state_init(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+static inline void vma_numab_state_free(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma)
+{
+ (void)orig_vma;
+ (void)new_vma;
+}
+
+static inline void free_anon_vma_name(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
#endif /* __MM_VMA_INTERNAL_H */
--
2.49.0
Powered by blists - more mailing lists