[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8c548bb3d0286bfaef2cd5e67d7bf698967a52a1.1719481836.git.lstoakes@gmail.com>
Date: Thu, 27 Jun 2024 11:39:28 +0100
From: Lorenzo Stoakes <lstoakes@...il.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>,
Matthew Wilcox <willy@...radead.org>,
Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>,
Jan Kara <jack@...e.cz>,
Eric Biederman <ebiederm@...ssion.com>,
Kees Cook <kees@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>,
Lorenzo Stoakes <lstoakes@...il.com>
Subject: [RFC PATCH 3/7] mm: unexport vma_expand() / vma_shrink()
The vma_expand() and vma_shrink() functions are core VMA manipulaion
functions which ultimately invoke VMA split/merge. In order to make these
testable, it is convenient to place all such core functions in a header
internal to mm/.
In addition, it is safer to abstract direct access to such functionality so
we can better control how other parts of the kernel use them, which
provides us the freedom to change how this functionality behaves as needed
without having to worry about how this functionality is used elsewhere.
In order to service both these requirements, we provide abstractions for
the sole external user of these functions, shift_arg_pages() in fs/exec.c.
We provide vma_expand_bottom() and vma_shrink_top() functions which better
match the semantics of what shift_arg_pages() is trying to accomplish by
explicitly wrapping the safe expansion of the bottom of a VMA and the
shrinking of the top of a VMA.
As a result, we place the vma_shrink() and vma_expand() functions into
mm/internal.h to unexport them from use by any other part of the kernel.
Signed-off-by: Lorenzo Stoakes <lstoakes@...il.com>
---
fs/exec.c | 26 +++++--------------
include/linux/mm.h | 9 +++----
mm/internal.h | 6 +++++
mm/mmap.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 82 insertions(+), 24 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 40073142288f..1cb3bf323e0f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -700,25 +700,14 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- VMA_ITERATOR(vmi, mm, new_start);
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *next;
struct mmu_gather tlb;
+ int ret;
- BUG_ON(new_start > new_end);
-
- /*
- * ensure there are no vmas between where we want to go
- * and where we are
- */
- if (vma != vma_next(&vmi))
- return -EFAULT;
-
- vma_iter_prev_range(&vmi);
- /*
- * cover the whole range: [new_start, old_end)
- */
- if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
- return -ENOMEM;
+ ret = vma_expand_bottom(&vmi, vma, shift, &next);
+ if (ret)
+ return ret;
/*
* move the page tables downwards, on failure we rely on
@@ -730,7 +719,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
- next = vma_next(&vmi);
+
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
@@ -749,9 +738,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
}
tlb_finish_mmu(&tlb);
- vma_prev(&vmi);
/* Shrink the vma to just the new range */
- return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
+ return vma_shrink_top(&vmi, vma, shift);
}
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d2b5538925b..e3220439cf75 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3273,11 +3273,10 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long start, unsigned long end, pgoff_t pgoff,
- struct vm_area_struct *next);
-extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long start, unsigned long end, pgoff_t pgoff);
+extern int vma_expand_bottom(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long shift, struct vm_area_struct **next);
+extern int vma_shrink_top(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long shift);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
diff --git a/mm/internal.h b/mm/internal.h
index c8177200c943..f7779727bb78 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1305,6 +1305,12 @@ static inline struct vm_area_struct
vma_policy(vma), new_ctx, anon_vma_name(vma));
}
+int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff,
+ struct vm_area_struct *next);
+int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff);
+
enum {
/* mark page accessed */
FOLL_TOUCH = 1 << 16,
diff --git a/mm/mmap.c b/mm/mmap.c
index e42d89f98071..574e69a04ebe 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3940,6 +3940,71 @@ void mm_drop_all_locks(struct mm_struct *mm)
mutex_unlock(&mm_all_locks_mutex);
}
+/*
+ * vma_expand_bottom() - Expands the bottom of a VMA downwards. An error will
+ * arise if there is another VMA in the expanded range, or
+ * if the expansion fails. This function leaves the VMA
+ * iterator, vmi, positioned at the newly expanded VMA.
+ * @vmi: The VMA iterator.
+ * @vma: The VMA to modify.
+ * @shift: The number of bytes by which to expand the bottom of the VMA.
+ * @next: Output parameter, pointing at the VMA immediately succeeding the newly
+ * expanded VMA.
+ *
+ * Returns: 0 on success, an error code otherwise.
+ */
+int vma_expand_bottom(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long shift, struct vm_area_struct **next)
+{
+ unsigned long old_start = vma->vm_start;
+ unsigned long old_end = vma->vm_end;
+ unsigned long new_start = old_start - shift;
+ unsigned long new_end = old_end - shift;
+
+ BUG_ON(new_start > new_end);
+
+ vma_iter_set(vmi, new_start);
+
+ /*
+ * ensure there are no vmas between where we want to go
+ * and where we are
+ */
+ if (vma != vma_next(vmi))
+ return -EFAULT;
+
+ vma_iter_prev_range(vmi);
+
+ /*
+ * cover the whole range: [new_start, old_end)
+ */
+ if (vma_expand(vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
+ return -ENOMEM;
+
+ *next = vma_next(vmi);
+ vma_prev(vmi);
+
+ return 0;
+}
+
+/*
+ * vma_shrink_top() - Reduce an existing VMA's memory area by shift bytes from
+ * the top of the VMA.
+ * @vmi: The VMA iterator, must be positioned at the VMA.
+ * @vma: The VMA to modify.
+ * @shift: The number of bytes by which to shrink the VMA.
+ *
+ * Returns: 0 on success, an error code otherwise.
+ */
+int vma_shrink_top(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long shift)
+{
+ if (shift >= vma->vm_end - vma->vm_start)
+ return -EINVAL;
+
+ return vma_shrink(vmi, vma, vma->vm_start, vma->vm_end - shift,
+ vma->vm_pgoff);
+}
+
/*
* initialise the percpu counter for VM
*/
--
2.45.1
Powered by blists - more mailing lists