[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b092522043d0f676151350e65be57b5bb5c8d72c.1719584707.git.lstoakes@gmail.com>
Date: Fri, 28 Jun 2024 15:35:24 +0100
From: Lorenzo Stoakes <lstoakes@...il.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>,
Matthew Wilcox <willy@...radead.org>,
Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>,
Jan Kara <jack@...e.cz>,
Eric Biederman <ebiederm@...ssion.com>,
Kees Cook <kees@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>,
Lorenzo Stoakes <lstoakes@...il.com>
Subject: [RFC PATCH v2 3/7] mm: move vma_shrink(), vma_expand() to internal header
The vma_shrink() and vma_expand() functions are internal VMA manipulation
functions which we ought to abstract for use outside of memory management
code.
To achieve this, we abstract the operation performed in fs/exec.c by
shift_arg_pages() into a new relocate_vma() function implemented in
mm/mmap.c, which enables us to also move move_page_tables() and
vma_iter_prev_range() to internal.h.
The purpose of doing this is to isolate key VMA manipulation functions in
order that we can both abstract them and later render them easily testable.
Signed-off-by: Lorenzo Stoakes <lstoakes@...il.com>
---
fs/exec.c | 68 ++------------------------------------
include/linux/mm.h | 17 +---------
mm/internal.h | 18 +++++++++++
mm/mmap.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 102 insertions(+), 82 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 40073142288f..5cf53e20d8df 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -683,75 +683,11 @@ static int copy_strings_kernel(int argc, const char *const *argv,
/*
* During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
* the binfmt code determines where the new stack should reside, we shift it to
- * its final location. The process proceeds as follows:
- *
- * 1) Use shift to calculate the new vma endpoints.
- * 2) Extend vma to cover both the old and new ranges. This ensures the
- * arguments passed to subsequent functions are consistent.
- * 3) Move vma's page tables to the new range.
- * 4) Free up any cleared pgd range.
- * 5) Shrink the vma to cover only the new range.
+ * its final location.
*/
static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
{
- struct mm_struct *mm = vma->vm_mm;
- unsigned long old_start = vma->vm_start;
- unsigned long old_end = vma->vm_end;
- unsigned long length = old_end - old_start;
- unsigned long new_start = old_start - shift;
- unsigned long new_end = old_end - shift;
- VMA_ITERATOR(vmi, mm, new_start);
- struct vm_area_struct *next;
- struct mmu_gather tlb;
-
- BUG_ON(new_start > new_end);
-
- /*
- * ensure there are no vmas between where we want to go
- * and where we are
- */
- if (vma != vma_next(&vmi))
- return -EFAULT;
-
- vma_iter_prev_range(&vmi);
- /*
- * cover the whole range: [new_start, old_end)
- */
- if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
- return -ENOMEM;
-
- /*
- * move the page tables downwards, on failure we rely on
- * process cleanup to remove whatever mess we made.
- */
- if (length != move_page_tables(vma, old_start,
- vma, new_start, length, false, true))
- return -ENOMEM;
-
- lru_add_drain();
- tlb_gather_mmu(&tlb, mm);
- next = vma_next(&vmi);
- if (new_end > old_start) {
- /*
- * when the old and new regions overlap clear from new_end.
- */
- free_pgd_range(&tlb, new_end, old_end, new_end,
- next ? next->vm_start : USER_PGTABLES_CEILING);
- } else {
- /*
- * otherwise, clean from old_start; this is done to not touch
- * the address space in [new_end, old_start) some architectures
- * have constraints on va-space that make this illegal (IA64) -
- * for the others its just a little faster.
- */
- free_pgd_range(&tlb, old_start, old_end, new_end,
- next ? next->vm_start : USER_PGTABLES_CEILING);
- }
- tlb_finish_mmu(&tlb);
-
- vma_prev(&vmi);
- /* Shrink the vma to just the new range */
- return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
+ return relocate_vma(vma, shift);
}
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d2b5538925b..ab4b70f2ce94 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -998,12 +998,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
return mas_prev(&vmi->mas, 0);
}
-static inline
-struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
-{
- return mas_prev_range(&vmi->mas, 0);
-}
-
static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
return vmi->mas.index;
@@ -2523,11 +2517,6 @@ int set_page_dirty_lock(struct page *page);
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
-extern unsigned long move_page_tables(struct vm_area_struct *vma,
- unsigned long old_addr, struct vm_area_struct *new_vma,
- unsigned long new_addr, unsigned long len,
- bool need_rmap_locks, bool for_stack);
-
/*
* Flags used by change_protection(). For now we make it a bitmap so
* that we can pass in multiple flags just like parameters. However
@@ -3273,11 +3262,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long start, unsigned long end, pgoff_t pgoff,
- struct vm_area_struct *next);
-extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long start, unsigned long end, pgoff_t pgoff);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
@@ -3285,6 +3269,7 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
+extern int relocate_vma(struct vm_area_struct *vma, unsigned long shift);
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
diff --git a/mm/internal.h b/mm/internal.h
index 164f03c6bce2..8c7aa5860df4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1304,6 +1304,12 @@ static inline struct vm_area_struct
vma_policy(vma), new_ctx, anon_vma_name(vma));
}
+int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff,
+ struct vm_area_struct *next);
+int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff);
+
enum {
/* mark page accessed */
FOLL_TOUCH = 1 << 16,
@@ -1527,6 +1533,12 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
return 0;
}
+static inline
+struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
+{
+ return mas_prev_range(&vmi->mas, 0);
+}
+
/*
* VMA lock generalization
*/
@@ -1638,4 +1650,10 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
+/* mremap.c */
+unsigned long move_page_tables(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len,
+ bool need_rmap_locks, bool for_stack);
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index e42d89f98071..d2eebbed87b9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -4058,3 +4058,84 @@ static int __meminit init_reserve_notifier(void)
return 0;
}
subsys_initcall(init_reserve_notifier);
+
+/*
+ * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
+ * this VMA and its relocated range, which will now reside at [vma->vm_start -
+ * shift, vma->vm_end - shift).
+ *
+ * This function is almost certainly NOT what you want for anything other than
+ * early executable temporary stack relocation.
+ */
+int relocate_vma(struct vm_area_struct *vma, unsigned long shift)
+{
+ /*
+ * The process proceeds as follows:
+ *
+ * 1) Use shift to calculate the new vma endpoints.
+ * 2) Extend vma to cover both the old and new ranges. This ensures the
+ * arguments passed to subsequent functions are consistent.
+ * 3) Move vma's page tables to the new range.
+ * 4) Free up any cleared pgd range.
+ * 5) Shrink the vma to cover only the new range.
+ */
+
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long old_start = vma->vm_start;
+ unsigned long old_end = vma->vm_end;
+ unsigned long length = old_end - old_start;
+ unsigned long new_start = old_start - shift;
+ unsigned long new_end = old_end - shift;
+ VMA_ITERATOR(vmi, mm, new_start);
+ struct vm_area_struct *next;
+ struct mmu_gather tlb;
+
+ BUG_ON(new_start > new_end);
+
+ /*
+ * ensure there are no vmas between where we want to go
+ * and where we are
+ */
+ if (vma != vma_next(&vmi))
+ return -EFAULT;
+
+ vma_iter_prev_range(&vmi);
+ /*
+ * cover the whole range: [new_start, old_end)
+ */
+ if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
+ return -ENOMEM;
+
+ /*
+ * move the page tables downwards, on failure we rely on
+ * process cleanup to remove whatever mess we made.
+ */
+ if (length != move_page_tables(vma, old_start,
+ vma, new_start, length, false, true))
+ return -ENOMEM;
+
+ lru_add_drain();
+ tlb_gather_mmu(&tlb, mm);
+ next = vma_next(&vmi);
+ if (new_end > old_start) {
+ /*
+ * when the old and new regions overlap clear from new_end.
+ */
+ free_pgd_range(&tlb, new_end, old_end, new_end,
+ next ? next->vm_start : USER_PGTABLES_CEILING);
+ } else {
+ /*
+ * otherwise, clean from old_start; this is done to not touch
+ * the address space in [new_end, old_start) some architectures
+ * have constraints on va-space that make this illegal (IA64) -
+ * for the others its just a little faster.
+ */
+ free_pgd_range(&tlb, old_start, old_end, new_end,
+ next ? next->vm_start : USER_PGTABLES_CEILING);
+ }
+ tlb_finish_mmu(&tlb);
+
+ vma_prev(&vmi);
+ /* Shrink the vma to just the new range */
+ return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
+}
--
2.45.1
Powered by blists - more mailing lists