lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpGxN6Py8zi9CveO+1xRaXd9+=sBFBjNPCjq=0wEmiHZ7A@mail.gmail.com>
Date: Fri, 7 Jun 2024 07:38:45 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: "Liam R. Howlett" <Liam.Howlett@...cle.com>
Cc: Andrii Nakryiko <andrii.nakryiko@...il.com>, Vlastimil Babka <vbabka@...e.cz>, 
	sidhartha.kumar@...cle.com, Matthew Wilcox <willy@...radead.org>, 
	Lorenzo Stoakes <lstoakes@...il.com>, linux-fsdevel@...r.kernel.org, bpf@...r.kernel.org, 
	linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 3/5] mm/mmap: Introduce vma_munmap_struct for use in
 munmap operations

On Fri, May 31, 2024 at 9:33 AM Liam R. Howlett <Liam.Howlett@...cle.com> wrote:
>
> Use a structure to pass along all the necessary information and counters
> involved in removing vmas from the mm_struct.
>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@...cle.com>

Reviewed-by: Suren Baghdasaryan <surenb@...gle.com>

> ---
>  mm/internal.h |  16 ++++++
>  mm/mmap.c     | 133 +++++++++++++++++++++++++++++---------------------
>  2 files changed, 94 insertions(+), 55 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index b2c75b12014e..6ebf77853d68 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1428,6 +1428,22 @@ struct vma_prepare {
>         struct vm_area_struct *remove2;
>  };
>
> +/*
> + * vma munmap operation
> + */
> +struct vma_munmap_struct {
> +       struct vma_iterator *vmi;
> +       struct mm_struct *mm;
> +       struct vm_area_struct *vma;     /* The first vma to munmap */
> +       struct list_head *uf;           /* Userfaultfd list_head */
> +       unsigned long start;            /* Aligned start addr */
> +       unsigned long end;              /* Aligned end addr */
> +       int vma_count;                  /* Number of vmas that will be removed */
> +       unsigned long nr_pages;         /* Number of pages being removed */
> +       unsigned long locked_vm;        /* Number of locked pages */
> +       bool unlock;                    /* Unlock after the munmap */
> +};
> +
>  void __meminit __init_single_page(struct page *page, unsigned long pfn,
>                                 unsigned long zone, int nid);
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index fad40d604c64..57f2383245ea 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -459,6 +459,31 @@ static inline void init_vma_prep(struct vma_prepare *vp,
>         init_multi_vma_prep(vp, vma, NULL, NULL, NULL);
>  }
>
> +/*
> + * init_vma_munmap() - Initializer wrapper for vma_munmap_struct
> + * @vms: The vma munmap struct
> + * @vmi: The vma iterator
> + * @vma: The first vm_area_struct to munmap
> + * @start: The aligned start address to munmap
> + * @end: The aligned end address to munmap
> + * @uf: The userfaultfd list_head
> + * @unlock: Unlock after the operation.  Only unlocked on success
> + */
> +static inline void init_vma_munmap(struct vma_munmap_struct *vms,
> +               struct vma_iterator *vmi, struct vm_area_struct *vma,
> +               unsigned long start, unsigned long end, struct list_head *uf,
> +               bool unlock)
> +{
> +       vms->vmi = vmi;
> +       vms->vma = vma;
> +       vms->mm = vma->vm_mm;
> +       vms->start = start;
> +       vms->end = end;
> +       vms->unlock = unlock;
> +       vms->uf = uf;
> +       vms->vma_count = 0;
> +       vms->nr_pages = vms->locked_vm = 0;
> +}
>
>  /*
>   * vma_prepare() - Helper function for handling locking VMAs prior to altering
> @@ -2340,7 +2365,6 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
>
>                 if (vma->vm_flags & VM_ACCOUNT)
>                         nr_accounted += nrpages;
> -
>                 vm_stat_account(mm, vma->vm_flags, -nrpages);
>                 remove_vma(vma, false);
>         }
> @@ -2562,29 +2586,20 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach)
>  }
>
>  /*
> - * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
> + * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
>   * for removal at a later date.  Handles splitting first and last if necessary
>   * and marking the vmas as isolated.
>   *
> - * @vmi: The vma iterator
> - * @vma: The starting vm_area_struct
> - * @mm: The mm_struct
> - * @start: The aligned start address to munmap.
> - * @end: The aligned end address to munmap.
> - * @uf: The userfaultfd list_head
> + * @vms: The vma munmap struct
>   * @mas_detach: The maple state tracking the detached tree
>   *
>   * Return: 0 on success
>   */
> -static int
> -vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
> -                   struct mm_struct *mm, unsigned long start,
> -                   unsigned long end, struct list_head *uf,
> -                   struct ma_state *mas_detach, unsigned long *locked_vm)
> +static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
> +               struct ma_state *mas_detach)
>  {
>         struct vm_area_struct *next = NULL;
>         int error = -ENOMEM;
> -       int count = 0;
>
>         /*
>          * If we need to split any vma, do it now to save pain later.
> @@ -2595,17 +2610,18 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
>          */
>
>         /* Does it split the first one? */
> -       if (start > vma->vm_start) {
> +       if (vms->start > vms->vma->vm_start) {
>
>                 /*
>                  * Make sure that map_count on return from munmap() will
>                  * not exceed its limit; but let map_count go just above
>                  * its limit temporarily, to help free resources as expected.
>                  */
> -               if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
> +               if (vms->end < vms->vma->vm_end &&
> +                   vms->mm->map_count >= sysctl_max_map_count)
>                         goto map_count_exceeded;
>
> -               error = __split_vma(vmi, vma, start, 1);
> +               error = __split_vma(vms->vmi, vms->vma, vms->start, 1);
>                 if (error)
>                         goto start_split_failed;
>         }
> @@ -2614,24 +2630,24 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
>          * Detach a range of VMAs from the mm. Using next as a temp variable as
>          * it is always overwritten.
>          */
> -       next = vma;
> +       next = vms->vma;
>         do {
>                 /* Does it split the end? */
> -               if (next->vm_end > end) {
> -                       error = __split_vma(vmi, next, end, 0);
> +               if (next->vm_end > vms->end) {
> +                       error = __split_vma(vms->vmi, next, vms->end, 0);
>                         if (error)
>                                 goto end_split_failed;
>                 }
>                 vma_start_write(next);
> -               mas_set(mas_detach, count++);
> +               mas_set(mas_detach, vms->vma_count++);
>                 if (next->vm_flags & VM_LOCKED)
> -                       *locked_vm += vma_pages(next);
> +                       vms->locked_vm += vma_pages(next);
>
>                 error = mas_store_gfp(mas_detach, next, GFP_KERNEL);
>                 if (error)
>                         goto munmap_gather_failed;
>                 vma_mark_detached(next, true);
> -               if (unlikely(uf)) {
> +               if (unlikely(vms->uf)) {
>                         /*
>                          * If userfaultfd_unmap_prep returns an error the vmas
>                          * will remain split, but userland will get a
> @@ -2641,16 +2657,17 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
>                          * split, despite we could. This is unlikely enough
>                          * failure that it's not worth optimizing it for.
>                          */
> -                       error = userfaultfd_unmap_prep(next, start, end, uf);
> +                       error = userfaultfd_unmap_prep(next, vms->start,
> +                                                      vms->end, vms->uf);
>
>                         if (error)
>                                 goto userfaultfd_error;
>                 }
>  #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
> -               BUG_ON(next->vm_start < start);
> -               BUG_ON(next->vm_start > end);
> +               BUG_ON(next->vm_start < vms->start);
> +               BUG_ON(next->vm_start > vms->end);
>  #endif
> -       } for_each_vma_range(*vmi, next, end);
> +       } for_each_vma_range(*(vms->vmi), next, vms->end);
>
>  #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
>         /* Make sure no VMAs are about to be lost. */
> @@ -2659,21 +2676,21 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
>                 struct vm_area_struct *vma_mas, *vma_test;
>                 int test_count = 0;
>
> -               vma_iter_set(vmi, start);
> +               vma_iter_set(vms->vmi, vms->start);
>                 rcu_read_lock();
> -               vma_test = mas_find(&test, count - 1);
> -               for_each_vma_range(*vmi, vma_mas, end) {
> +               vma_test = mas_find(&test, vms->vma_count - 1);
> +               for_each_vma_range(*(vms->vmi), vma_mas, vms->end) {
>                         BUG_ON(vma_mas != vma_test);
>                         test_count++;
> -                       vma_test = mas_next(&test, count - 1);
> +                       vma_test = mas_next(&test, vms->vma_count - 1);
>                 }
>                 rcu_read_unlock();
> -               BUG_ON(count != test_count);
> +               BUG_ON(vms->vma_count != test_count);
>         }
>  #endif
>
> -       while (vma_iter_addr(vmi) > start)
> -               vma_iter_prev_range(vmi);
> +       while (vma_iter_addr(vms->vmi) > vms->start)
> +               vma_iter_prev_range(vms->vmi);
>
>         return 0;
>
> @@ -2686,38 +2703,44 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
>         return error;
>  }
>
> -static void
> -vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
> -               struct mm_struct *mm, unsigned long start,
> -               unsigned long end, bool unlock, struct ma_state *mas_detach,
> -               unsigned long locked_vm)
> +/*
> + * vmi_complete_munmap_vmas() - Update mm counters, unlock if directed, and free
> + * all VMA resources.
> + *
> + * do_vmi_align_munmap() - munmap the aligned region from @start to @end.
> + * @vms: The vma munmap struct
> + * @mas_detach: The maple state of the detached vmas
> + *
> + */
> +static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
> +               struct ma_state *mas_detach)
>  {
>         struct vm_area_struct *prev, *next;
> -       int count;
> +       struct mm_struct *mm;
>
> -       count = mas_detach->index + 1;
> -       mm->map_count -= count;
> -       mm->locked_vm -= locked_vm;
> -       if (unlock)
> +       mm = vms->mm;
> +       mm->map_count -= vms->vma_count;
> +       mm->locked_vm -= vms->locked_vm;
> +       if (vms->unlock)
>                 mmap_write_downgrade(mm);
>
> -       prev = vma_iter_prev_range(vmi);
> -       next = vma_next(vmi);
> +       prev = vma_iter_prev_range(vms->vmi);
> +       next = vma_next(vms->vmi);
>         if (next)
> -               vma_iter_prev_range(vmi);
> +               vma_iter_prev_range(vms->vmi);
>
>         /*
>          * We can free page tables without write-locking mmap_lock because VMAs
>          * were isolated before we downgraded mmap_lock.
>          */
>         mas_set(mas_detach, 1);
> -       unmap_region(mm, mas_detach, vma, prev, next, start, end, count,
> -                    !unlock);
> +       unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end,
> +                    vms->vma_count, !vms->unlock);
>         /* Statistics and freeing VMAs */
>         mas_set(mas_detach, 0);
>         remove_mt(mm, mas_detach);
>         validate_mm(mm);
> -       if (unlock)
> +       if (vms->unlock)
>                 mmap_read_unlock(mm);
>
>         __mt_destroy(mas_detach->tree);
> @@ -2746,11 +2769,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
>         MA_STATE(mas_detach, &mt_detach, 0, 0);
>         mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
>         mt_on_stack(mt_detach);
> +       struct vma_munmap_struct vms;
>         int error;
> -       unsigned long locked_vm = 0;
>
> -       error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf,
> -                                      &mas_detach, &locked_vm);
> +       init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock);
> +
> +       error = vms_gather_munmap_vmas(&vms, &mas_detach);
>         if (error)
>                 goto gather_failed;
>
> @@ -2758,8 +2782,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
>         if (error)
>                 goto clear_area_failed;
>
> -       vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach,
> -                                locked_vm);
> +       vms_complete_munmap_vmas(&vms, &mas_detach);
>         return 0;
>
>  clear_area_failed:
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ