[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240816111405.11793-6-spasswolf@web.de>
Date: Fri, 16 Aug 2024 13:13:46 +0200
From: Bert Karwatzki <spasswolf@....de>
To: "Liam R . Howlett" <Liam.Howlett@...cle.com>
Cc: Bert Karwatzki <spasswolf@....de>,
Suren Baghdasaryan <surenb@...gle.com>,
Vlastimil Babka <vbabka@...e.cz>,
Lorenzo Stoakes <lstoakes@...il.com>,
Matthew Wilcox <willy@...radead.org>,
sidhartha.kumar@...cle.com,
"Paul E . McKenney" <paulmck@...nel.org>,
Jiri Olsa <olsajiri@...il.com>,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
Andrew Morton <akpm@...ux-foundation.org>,
Kees Cook <kees@...nel.org>,
Jeff Xu <jeffxu@...omium.org>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Subject: [PATCH v5.1 05/19] mm/mmap: Introduce vma_munmap_struct for use in munmap operations
Use a structure to pass along all the necessary information and counters
involved in removing vmas from the mm_struct.
Update vmi_ function names to vms_ to indicate the first argument
type change.
Signed-off-by: Liam R. Howlett <Liam.Howlett@...cle.com>
Reviewed-by: Suren Baghdasaryan <surenb@...gle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
mm/vma.c | 140 +++++++++++++++++++++++++++++--------------------------
mm/vma.h | 16 +++++++
2 files changed, 90 insertions(+), 66 deletions(-)
diff --git a/mm/vma.c b/mm/vma.c
index a980837eefd7..9495230df3c3 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -365,6 +365,32 @@ static void __vma_link_file(struct vm_area_struct *vma,
flush_dcache_mmap_unlock(mapping);
}
+/*
+ * init_vma_munmap() - Initializer wrapper for vma_munmap_struct
+ * @vms: The vma munmap struct
+ * @vmi: The vma iterator
+ * @vma: The first vm_area_struct to munmap
+ * @start: The aligned start address to munmap
+ * @end: The aligned end address to munmap
+ * @uf: The userfaultfd list_head
+ * @unlock: Unlock after the operation. Only unlocked on success
+ */
+static inline void init_vma_munmap(struct vma_munmap_struct *vms,
+ struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, struct list_head *uf,
+ bool unlock)
+{
+ vms->vmi = vmi;
+ vms->vma = vma;
+ vms->mm = vma->vm_mm;
+ vms->start = start;
+ vms->end = end;
+ vms->unlock = unlock;
+ vms->uf = uf;
+ vms->vma_count = 0;
+ vms->nr_pages = vms->locked_vm = 0;
+}
+
/*
* vma_prepare() - Helper function for handling locking VMAs prior to altering
* @vp: The initialized vma_prepare struct
@@ -685,81 +711,63 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach)
}
/*
- * vmi_complete_munmap_vmas() - Finish the munmap() operation
- * @vmi: The vma iterator
- * @vma: The first vma to be munmapped
- * @mm: The mm struct
- * @start: The start address
- * @end: The end address
- * @unlock: Unlock the mm or not
- * @mas_detach: them maple state of the detached vma maple tree
- * @locked_vm: The locked_vm count in the detached vmas
+ * vms_complete_munmap_vmas() - Finish the munmap() operation
+ * @vms: The vma munmap struct
+ * @mas_detach: The maple state of the detached vmas
*
- * This function updates the mm_struct, unmaps the region, frees the resources
+ * This updates the mm_struct, unmaps the region, frees the resources
* used for the munmap() and may downgrade the lock - if requested. Everything
* needed to be done once the vma maple tree is updated.
*/
-static void
-vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
- struct mm_struct *mm, unsigned long start,
- unsigned long end, bool unlock, struct ma_state *mas_detach,
- unsigned long locked_vm)
+
+static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
+ struct ma_state *mas_detach)
{
struct vm_area_struct *prev, *next;
- int count;
+ struct mm_struct *mm;
- count = mas_detach->index + 1;
- mm->map_count -= count;
- mm->locked_vm -= locked_vm;
- if (unlock)
+ mm = vms->mm;
+ mm->map_count -= vms->vma_count;
+ mm->locked_vm -= vms->locked_vm;
+ if (vms->unlock)
mmap_write_downgrade(mm);
- prev = vma_iter_prev_range(vmi);
- next = vma_next(vmi);
+ prev = vma_iter_prev_range(vms->vmi);
+ next = vma_next(vms->vmi);
if (next)
- vma_iter_prev_range(vmi);
+ vma_iter_prev_range(vms->vmi);
/*
* We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock.
*/
mas_set(mas_detach, 1);
- unmap_region(mm, mas_detach, vma, prev, next, start, end, count,
- !unlock);
+ unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end,
+ vms->vma_count, !vms->unlock);
/* Statistics and freeing VMAs */
mas_set(mas_detach, 0);
remove_mt(mm, mas_detach);
validate_mm(mm);
- if (unlock)
+ if (vms->unlock)
mmap_read_unlock(mm);
__mt_destroy(mas_detach->tree);
}
/*
- * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
+ * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
* for removal at a later date. Handles splitting first and last if necessary
* and marking the vmas as isolated.
*
- * @vmi: The vma iterator
- * @vma: The starting vm_area_struct
- * @mm: The mm_struct
- * @start: The aligned start address to munmap.
- * @end: The aligned end address to munmap.
- * @uf: The userfaultfd list_head
+ * @vms: The vma munmap struct
* @mas_detach: The maple state tracking the detached tree
- * @locked_vm: a pointer to store the VM_LOCKED pages count.
*
* Return: 0 on success
*/
-static int
-vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
- struct mm_struct *mm, unsigned long start,
- unsigned long end, struct list_head *uf,
- struct ma_state *mas_detach, unsigned long *locked_vm)
+static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
+ struct ma_state *mas_detach)
{
struct vm_area_struct *next = NULL;
- int count = 0;
int error = -ENOMEM;
/*
@@ -771,17 +779,18 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
*/
/* Does it split the first one? */
- if (start > vma->vm_start) {
+ if (vms->start > vms->vma->vm_start) {
/*
* Make sure that map_count on return from munmap() will
* not exceed its limit; but let map_count go just above
* its limit temporarily, to help free resources as expected.
*/
- if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+ if (vms->end < vms->vma->vm_end &&
+ vms->mm->map_count >= sysctl_max_map_count)
goto map_count_exceeded;
- error = __split_vma(vmi, vma, start, 1);
+ error = __split_vma(vms->vmi, vms->vma, vms->start, 1);
if (error)
goto start_split_failed;
}
@@ -790,25 +799,24 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
* Detach a range of VMAs from the mm. Using next as a temp variable as
* it is always overwritten.
*/
- next = vma;
+ next = vms->vma;
do {
/* Does it split the end? */
- if (next->vm_end > end) {
- error = __split_vma(vmi, next, end, 0);
+ if (next->vm_end > vms->end) {
+ error = __split_vma(vms->vmi, next, vms->end, 0);
if (error)
goto end_split_failed;
}
vma_start_write(next);
- mas_set(mas_detach, count++);
+ mas_set(mas_detach, vms->vma_count++);
error = mas_store_gfp(mas_detach, next, GFP_KERNEL);
if (error)
goto munmap_gather_failed;
vma_mark_detached(next, true);
if (next->vm_flags & VM_LOCKED)
- *locked_vm += vma_pages(next);
+ vms->locked_vm += vma_pages(next);
- count++;
- if (unlikely(uf)) {
+ if (unlikely(vms->uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
* will remain split, but userland will get a
@@ -818,16 +826,17 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
* split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for.
*/
- error = userfaultfd_unmap_prep(next, start, end, uf);
+ error = userfaultfd_unmap_prep(next, vms->start,
+ vms->end, vms->uf);
if (error)
goto userfaultfd_error;
}
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
- BUG_ON(next->vm_start < start);
- BUG_ON(next->vm_start > end);
+ BUG_ON(next->vm_start < vms->start);
+ BUG_ON(next->vm_start > vms->end);
#endif
- } for_each_vma_range(*vmi, next, end);
+ } for_each_vma_range(*(vms->vmi), next, vms->end);
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */
@@ -836,21 +845,21 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct *vma_mas, *vma_test;
int test_count = 0;
- vma_iter_set(vmi, start);
+ vma_iter_set(vms->vmi, vms->start);
rcu_read_lock();
- vma_test = mas_find(&test, count - 1);
- for_each_vma_range(*vmi, vma_mas, end) {
+ vma_test = mas_find(&test, vms->vma_count - 1);
+ for_each_vma_range(*(vms->vmi), vma_mas, vms->end) {
BUG_ON(vma_mas != vma_test);
test_count++;
- vma_test = mas_next(&test, count - 1);
+ vma_test = mas_next(&test, vms->vma_count - 1);
}
rcu_read_unlock();
- BUG_ON(count != test_count);
+ BUG_ON(vms->vma_count != test_count);
}
#endif
- while (vma_iter_addr(vmi) > start)
- vma_iter_prev_range(vmi);
+ while (vma_iter_addr(vms->vmi) > vms->start)
+ vma_iter_prev_range(vms->vmi);
return 0;
@@ -886,11 +895,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_on_stack(mt_detach);
+ struct vma_munmap_struct vms;
int error;
- unsigned long locked_vm = 0;
- error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf,
- &mas_detach, &locked_vm);
+ init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock);
+ error = vms_gather_munmap_vmas(&vms, &mas_detach);
if (error)
goto gather_failed;
@@ -899,8 +908,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto clear_tree_failed;
/* Point of no return */
- vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach,
- locked_vm);
+ vms_complete_munmap_vmas(&vms, &mas_detach);
return 0;
clear_tree_failed:
diff --git a/mm/vma.h b/mm/vma.h
index 6efdf1768a0a..f65c739cbd00 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -21,6 +21,22 @@ struct vma_prepare {
struct vm_area_struct *remove2;
};
+/*
+ * vma munmap operation
+ */
+struct vma_munmap_struct {
+ struct vma_iterator *vmi;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma; /* The first vma to munmap */
+ struct list_head *uf; /* Userfaultfd list_head */
+ unsigned long start; /* Aligned start addr (inclusive) */
+ unsigned long end; /* Aligned end addr (exclusive) */
+ int vma_count; /* Number of vmas that will be removed */
+ unsigned long nr_pages; /* Number of pages being removed */
+ unsigned long locked_vm; /* Number of locked pages */
+ bool unlock; /* Unlock after the munmap */
+};
+
struct unlink_vma_file_batch {
int count;
struct vm_area_struct *vmas[8];
--
2.45.2
Powered by blists - more mailing lists