linux-kernel - Re: [PATCH v2 8/8] binder: use per-vma lock in page installation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpHM8J0S4dXhxmVuFSTUV0czg1CTFpf_C=k7M57T9qh-VQ@mail.gmail.com>
Date: Thu, 7 Nov 2024 08:16:39 -0800
From: Suren Baghdasaryan <surenb@...gle.com>
To: Carlos Llamas <cmllamas@...gle.com>
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>, Arve Hjønnevåg <arve@...roid.com>, 
	Todd Kjos <tkjos@...roid.com>, Martijn Coenen <maco@...roid.com>, 
	Joel Fernandes <joel@...lfernandes.org>, Christian Brauner <brauner@...nel.org>, 
	linux-kernel@...r.kernel.org, kernel-team@...roid.com, 
	Nhat Pham <nphamcs@...il.com>, Johannes Weiner <hannes@...xchg.org>, 
	Barry Song <v-songbaohua@...o.com>, Hillf Danton <hdanton@...a.com>, 
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Subject: Re: [PATCH v2 8/8] binder: use per-vma lock in page installation

On Wed, Nov 6, 2024 at 8:03 PM Carlos Llamas <cmllamas@...gle.com> wrote:
>
> Use per-vma locking for concurrent page installations, this minimizes
> contention with unrelated vmas improving performance. The mmap_lock is
> still acquired when needed though, e.g. before get_user_pages_remote().
>
> Many thanks to Barry Song who posted a similar approach [1].
>
> Link: https://lore.kernel.org/all/20240902225009.34576-1-21cnbao@gmail.com/ [1]
> Cc: Nhat Pham <nphamcs@...il.com>
> Cc: Johannes Weiner <hannes@...xchg.org>
> Cc: Barry Song <v-songbaohua@...o.com>
> Cc: Suren Baghdasaryan <surenb@...gle.com>
> Cc: Hillf Danton <hdanton@...a.com>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> Signed-off-by: Carlos Llamas <cmllamas@...gle.com>
> ---
>  drivers/android/binder_alloc.c | 85 +++++++++++++++++++++++-----------
>  1 file changed, 57 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
> index 814435a2601a..debfa541e01b 100644
> --- a/drivers/android/binder_alloc.c
> +++ b/drivers/android/binder_alloc.c
> @@ -233,6 +233,56 @@ static inline bool binder_alloc_is_mapped(struct binder_alloc *alloc)
>         return smp_load_acquire(&alloc->mapped);
>  }
>
> +static struct page *binder_page_lookup(struct mm_struct *mm,

Maybe pass "struct binder_alloc" in both binder_page_lookup() and
binder_page_insert()?
I like how previous code stabilized mm with mmget_not_zero() once vs
now binder_page_lookup() and binder_page_insert() have to mmget/mmput
individually. Not a big deal but looked cleaner.

> +                                      unsigned long addr)
> +{
> +       struct page *page;
> +       long ret;
> +
> +       if (!mmget_not_zero(mm))
> +               return NULL;
> +
> +       mmap_read_lock(mm);
> +       ret = get_user_pages_remote(mm, addr, 1, 0, &page, NULL);
> +       mmap_read_unlock(mm);
> +       mmput_async(mm);
> +
> +       return ret > 0 ? page : NULL;
> +}
> +
> +static int binder_page_insert(struct binder_alloc *alloc,
> +                             unsigned long addr,
> +                             struct page *page)
> +{
> +       struct mm_struct *mm = alloc->mm;
> +       struct vm_area_struct *vma;
> +       int ret = -ESRCH;
> +
> +       if (!mmget_not_zero(mm))
> +               return -ESRCH;
> +
> +       /* attempt per-vma lock first */
> +       vma = lock_vma_under_rcu(mm, addr);
> +       if (!vma)
> +               goto lock_mmap;
> +
> +       if (binder_alloc_is_mapped(alloc))

I don't think you need this check here. lock_vma_under_rcu() ensures
that the VMA was not detached from the tree after locking the VMA, so
if you got a VMA it's in the tree and it can't be removed (because
it's locked). remove_vma()->vma_close()->vma->vm_ops->close() is
called after VMA gets detached from the tree and that won't happen
while VMA is locked. So, if lock_vma_under_rcu() returns a VMA,
binder_alloc_is_mapped() has to always return true. A WARN_ON() check
here to ensure that might be a better option.

> +               ret = vm_insert_page(vma, addr, page);
> +       vma_end_read(vma);
> +       goto done;

I think the code would be more readable without these jumps:

        vma = lock_vma_under_rcu(mm, addr);
        if (vma) {
               if (!WARN_ON(!binder_alloc_is_mapped(alloc)))
                       ret = vm_insert_page(vma, addr, page);
               vma_end_read(vma);
        } else {
               /* fall back to mmap_lock */
               mmap_read_lock(mm);
               vma = vma_lookup(mm, addr);
               if (vma && binder_alloc_is_mapped(alloc))
                       ret = vm_insert_page(vma, addr, page);
               mmap_read_unlock(mm);
        }
        mmput_async(mm);
        return ret;


> +
> +lock_mmap:
> +       /* fall back to mmap_lock */
> +       mmap_read_lock(mm);
> +       vma = vma_lookup(mm, addr);
> +       if (vma && binder_alloc_is_mapped(alloc))
> +               ret = vm_insert_page(vma, addr, page);
> +       mmap_read_unlock(mm);
> +done:
> +       mmput_async(mm);
> +       return ret;
> +}
> +
>  static struct page *binder_page_alloc(struct binder_alloc *alloc,
>                                       unsigned long index,
>                                       unsigned long addr)
> @@ -254,31 +304,14 @@ static int binder_install_single_page(struct binder_alloc *alloc,
>                                       unsigned long index,
>                                       unsigned long addr)
>  {
> -       struct vm_area_struct *vma;
>         struct page *page;
> -       long npages;
>         int ret;
>
> -       if (!mmget_not_zero(alloc->mm))
> -               return -ESRCH;
> -
>         page = binder_page_alloc(alloc, index, addr);
> -       if (!page) {
> -               ret = -ENOMEM;
> -               goto out;
> -       }
> -
> -       mmap_read_lock(alloc->mm);
> -       vma = vma_lookup(alloc->mm, addr);
> -       if (!vma || !binder_alloc_is_mapped(alloc)) {
> -               mmap_read_unlock(alloc->mm);
> -               __free_page(page);
> -               pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
> -               ret = -ESRCH;
> -               goto out;
> -       }
> +       if (!page)
> +               return -ENOMEM;
>
> -       ret = vm_insert_page(vma, addr, page);
> +       ret = binder_page_insert(alloc, addr, page);
>         switch (ret) {
>         case -EBUSY:
>                 /*
> @@ -288,12 +321,11 @@ static int binder_install_single_page(struct binder_alloc *alloc,
>                  */
>                 ret = 0;
>                 __free_page(page);
> -               npages = get_user_pages_remote(alloc->mm, addr, 1, 0, &page, NULL);
> -               if (npages <= 0) {
> +               page = binder_page_lookup(alloc->mm, addr);
> +               if (!page) {
>                         pr_err("%d: failed to find page at offset %lx\n",
>                                alloc->pid, addr - alloc->vm_start);
> -                       ret = -ESRCH;
> -                       break;
> +                       return -ESRCH;
>                 }
>                 fallthrough;
>         case 0:
> @@ -304,12 +336,9 @@ static int binder_install_single_page(struct binder_alloc *alloc,
>                 __free_page(page);
>                 pr_err("%d: %s failed to insert page at offset %lx with %d\n",
>                        alloc->pid, __func__, addr - alloc->vm_start, ret);
> -               ret = -ENOMEM;
>                 break;
>         }
> -       mmap_read_unlock(alloc->mm);
> -out:
> -       mmput_async(alloc->mm);
> +
>         return ret;
>  }
>
> --
> 2.47.0.199.ga7371fff76-goog
>