[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpGAr=Tm=JJtnrJ0ipFo2yqYSEVAMtx5aUU-k1F6prjYjQ@mail.gmail.com>
Date: Thu, 7 Nov 2024 07:10:28 -0800
From: Suren Baghdasaryan <surenb@...gle.com>
To: Carlos Llamas <cmllamas@...gle.com>
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>, Arve Hjønnevåg <arve@...roid.com>,
Todd Kjos <tkjos@...roid.com>, Martijn Coenen <maco@...roid.com>,
Joel Fernandes <joel@...lfernandes.org>, Christian Brauner <brauner@...nel.org>,
linux-kernel@...r.kernel.org, kernel-team@...roid.com,
David Hildenbrand <david@...hat.com>, Barry Song <v-songbaohua@...o.com>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>
Subject: Re: [PATCH v2 2/8] binder: concurrent page installation
On Wed, Nov 6, 2024 at 8:02 PM Carlos Llamas <cmllamas@...gle.com> wrote:
>
> Allow multiple callers to install pages simultaneously by downgrading
> the mmap_sem to non-exclusive mode. Races to the same PTE are handled
> using get_user_pages_remote() to retrieve the already installed page.
> This method significantly reduces contention in the mmap semaphore.
>
> To ensure safety, vma_lookup() is used (instead of alloc->vma) to avoid
> operating on an isolated VMA. In addition, zap_page_range_single() is
> called under the alloc->mutex to avoid racing with the shrinker.
>
> Many thanks to Barry Song who posted a similar approach [1].
>
> Link: https://lore.kernel.org/all/20240902225009.34576-1-21cnbao@gmail.com/ [1]
> Cc: David Hildenbrand <david@...hat.com>
> Cc: Barry Song <v-songbaohua@...o.com>
> Cc: Suren Baghdasaryan <surenb@...gle.com>
> Cc: Liam R. Howlett <Liam.Howlett@...cle.com>
> Signed-off-by: Carlos Llamas <cmllamas@...gle.com>
> ---
> drivers/android/binder_alloc.c | 64 +++++++++++++++++++++-------------
> 1 file changed, 40 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
> index 7241bf4a3ff2..acdc05552603 100644
> --- a/drivers/android/binder_alloc.c
> +++ b/drivers/android/binder_alloc.c
> @@ -221,26 +221,14 @@ static int binder_install_single_page(struct binder_alloc *alloc,
> struct binder_lru_page *lru_page,
> unsigned long addr)
> {
> + struct vm_area_struct *vma;
> struct page *page;
> - int ret = 0;
> + long npages;
> + int ret;
>
> if (!mmget_not_zero(alloc->mm))
> return -ESRCH;
>
> - /*
> - * Protected with mmap_sem in write mode as multiple tasks
> - * might race to install the same page.
> - */
> - mmap_write_lock(alloc->mm);
> - if (binder_get_installed_page(lru_page))
> - goto out;
> -
> - if (!alloc->vma) {
> - pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
> - ret = -ESRCH;
> - goto out;
> - }
> -
> page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
> if (!page) {
> pr_err("%d: failed to allocate page\n", alloc->pid);
> @@ -248,19 +236,47 @@ static int binder_install_single_page(struct binder_alloc *alloc,
> goto out;
> }
>
> - ret = vm_insert_page(alloc->vma, addr, page);
> - if (ret) {
> - pr_err("%d: %s failed to insert page at offset %lx with %d\n",
> - alloc->pid, __func__, addr - alloc->buffer, ret);
> + mmap_read_lock(alloc->mm);
> + vma = vma_lookup(alloc->mm, addr);
> + if (!vma || vma != alloc->vma) {
> + mmap_read_unlock(alloc->mm);
nit: instead of unlocking here you could have another label before
mmap_read_unlock() at the end and jump to it.
> __free_page(page);
> - ret = -ENOMEM;
> + pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
> + ret = -ESRCH;
> goto out;
> }
>
> - /* Mark page installation complete and safe to use */
> - binder_set_installed_page(lru_page, page);
> + ret = vm_insert_page(vma, addr, page);
> + switch (ret) {
> + case -EBUSY:
> + /*
> + * EBUSY is ok. Someone installed the pte first but the
> + * lru_page->page_ptr has not been updated yet. Discard
> + * our page and look up the one already installed.
> + */
> + ret = 0;
> + __free_page(page);
> + npages = get_user_pages_remote(alloc->mm, addr, 1, 0, &page, NULL);
> + if (npages <= 0) {
> + pr_err("%d: failed to find page at offset %lx\n",
> + alloc->pid, addr - alloc->buffer);
> + ret = -ESRCH;
> + break;
> + }
> + fallthrough;
> + case 0:
> + /* Mark page installation complete and safe to use */
> + binder_set_installed_page(lru_page, page);
> + break;
> + default:
> + __free_page(page);
> + pr_err("%d: %s failed to insert page at offset %lx with %d\n",
> + alloc->pid, __func__, addr - alloc->buffer, ret);
> + ret = -ENOMEM;
vm_insert_page() can return EINVAL (see
validate_page_before_insert()). Instead of converting other codes into
ENOMEM why not return "ret" as is?
> + break;
> + }
> + mmap_read_unlock(alloc->mm);
> out:
> - mmap_write_unlock(alloc->mm);
> mmput_async(alloc->mm);
> return ret;
> }
> @@ -1091,7 +1107,6 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
> trace_binder_unmap_kernel_end(alloc, index);
>
> list_lru_isolate(lru, item);
> - mutex_unlock(&alloc->mutex);
> spin_unlock(lock);
>
> if (vma) {
> @@ -1102,6 +1117,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
> trace_binder_unmap_user_end(alloc, index);
> }
>
> + mutex_unlock(&alloc->mutex);
> mmap_read_unlock(mm);
> mmput_async(mm);
> __free_page(page_to_free);
> --
> 2.47.0.199.ga7371fff76-goog
>
Powered by blists - more mailing lists