[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJuCfpEWwdnNs458wSb__C0eHSEvbvJ9nK3ryQWLTjA3+b3BmQ@mail.gmail.com>
Date: Mon, 29 Dec 2025 14:17:53 -0800
From: Suren Baghdasaryan <surenb@...gle.com>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, "Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>, Shakeel Butt <shakeel.butt@...ux.dev>,
David Hildenbrand <david@...nel.org>, Rik van Riel <riel@...riel.com>, Harry Yoo <harry.yoo@...cle.com>,
Jann Horn <jannh@...gle.com>, Mike Rapoport <rppt@...nel.org>, Michal Hocko <mhocko@...e.com>,
Pedro Falcato <pfalcato@...e.de>, Chris Li <chriscli@...gle.com>,
Barry Song <v-songbaohua@...o.com>, linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 3/8] mm/rmap: remove unnecessary root lock dance in
anon_vma clone, unmap
On Wed, Dec 17, 2025 at 4:27 AM Lorenzo Stoakes
<lorenzo.stoakes@...cle.com> wrote:
>
> The root anon_vma of all anon_vma's linked to a VMA must by definition be
> the same - a VMA and all of its descendants/ancestors must exist in the
> same CoW chain.
>
> Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in
> anon_vma_clone()") introduced paranoid checking of the root anon_vma
> remaining the same throughout all AVC's in 2011.
>
> I think 15 years later we can safely assume that this is always the case.
>
> Additionally, since unfaulted VMAs being cloned from or unlinked are
> no-op's, we can simply lock the anon_vma's associated with this rather than
> doing any specific dance around this.
>
> This removes unnecessary checks and makes it clear that the root anon_vma
> is shared between all anon_vma's in a given VMA's anon_vma_chain.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> mm/rmap.c | 48 ++++++++++++------------------------------------
> 1 file changed, 12 insertions(+), 36 deletions(-)
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 9332d1cbc643..60134a566073 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -231,32 +231,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> return -ENOMEM;
> }
>
> -/*
> - * This is a useful helper function for locking the anon_vma root as
> - * we traverse the vma->anon_vma_chain, looping over anon_vma's that
> - * have the same vma.
> - *
> - * Such anon_vma's should have the same root, so you'd expect to see
> - * just a single mutex_lock for the whole traversal.
> - */
> -static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
> -{
> - struct anon_vma *new_root = anon_vma->root;
> - if (new_root != root) {
> - if (WARN_ON_ONCE(root))
> - up_write(&root->rwsem);
> - root = new_root;
> - down_write(&root->rwsem);
> - }
> - return root;
> -}
> -
> -static inline void unlock_anon_vma_root(struct anon_vma *root)
> -{
> - if (root)
> - up_write(&root->rwsem);
> -}
> -
> static void check_anon_vma_clone(struct vm_area_struct *dst,
> struct vm_area_struct *src)
> {
> @@ -307,26 +281,25 @@ static void check_anon_vma_clone(struct vm_area_struct *dst,
> int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> {
> struct anon_vma_chain *avc, *pavc;
> - struct anon_vma *root = NULL;
>
> if (!src->anon_vma)
> return 0;
>
> check_anon_vma_clone(dst, src);
>
> + anon_vma_lock_write(src->anon_vma);
> list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
> struct anon_vma *anon_vma;
>
> avc = anon_vma_chain_alloc(GFP_NOWAIT);
> if (unlikely(!avc)) {
> - unlock_anon_vma_root(root);
> - root = NULL;
> + anon_vma_unlock_write(src->anon_vma);
> avc = anon_vma_chain_alloc(GFP_KERNEL);
> if (!avc)
> goto enomem_failure;
> + anon_vma_lock_write(src->anon_vma);
So, we drop and then reacquire src->anon_vma->root->rwsem, expecting
src->anon_vma and src->anon_vma->root to be the same. And IIUC
src->vm_mm's mmap lock is what guarantees all this. If so, could you
please add a clarifying comment here?
> }
> anon_vma = pavc->anon_vma;
> - root = lock_anon_vma_root(root, anon_vma);
> anon_vma_chain_link(dst, avc, anon_vma);
>
> /*
> @@ -343,7 +316,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> }
> if (dst->anon_vma)
> dst->anon_vma->num_active_vmas++;
> - unlock_anon_vma_root(root);
> +
> + anon_vma_unlock_write(src->anon_vma);
> return 0;
>
> enomem_failure:
> @@ -438,15 +412,17 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> void unlink_anon_vmas(struct vm_area_struct *vma)
> {
> struct anon_vma_chain *avc, *next;
> - struct anon_vma *root = NULL;
> + struct anon_vma *active_anon_vma = vma->anon_vma;
>
> /* Always hold mmap lock, read-lock on unmap possibly. */
> mmap_assert_locked(vma->vm_mm);
>
> /* Unfaulted is a no-op. */
> - if (!vma->anon_vma)
> + if (!active_anon_vma)
> return;
>
> + anon_vma_lock_write(active_anon_vma);
> +
> /*
> * Unlink each anon_vma chained to the VMA. This list is ordered
> * from newest to oldest, ensuring the root anon_vma gets freed last.
> @@ -454,7 +430,6 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
> list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
> struct anon_vma *anon_vma = avc->anon_vma;
>
> - root = lock_anon_vma_root(root, anon_vma);
> anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
>
> /*
> @@ -470,13 +445,14 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
> anon_vma_chain_free(avc);
> }
>
> - vma->anon_vma->num_active_vmas--;
> + active_anon_vma->num_active_vmas--;
> /*
> * vma would still be needed after unlink, and anon_vma will be prepared
> * when handle fault.
> */
> vma->anon_vma = NULL;
> - unlock_anon_vma_root(root);
> + anon_vma_unlock_write(active_anon_vma);
> +
>
> /*
> * Iterate the list once more, it now only contains empty and unlinked
> --
> 2.52.0
>
Powered by blists - more mailing lists