[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <405f6c44b4214ff466743ed94d16cb2fbea1b7f3@linux.dev>
Date: Sat, 06 Sep 2025 14:59:29 +0000
From: "Yajun Deng" <yajun.deng@...ux.dev>
To: "Liam R. Howlett" <Liam.Howlett@...cle.com>
Cc: akpm@...ux-foundation.org, david@...hat.com, lorenzo.stoakes@...cle.com,
riel@...riel.com, vbabka@...e.cz, harry.yoo@...cle.com,
linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm/rmap: make num_children and num_active_vmas update in
internally
September 5, 2025 at 11:16 PM, "Liam R. Howlett" <Liam.Howlett@...cle.com mailto:Liam.Howlett@...cle.com?to=%22Liam%20R.%20Howlett%22%20%3CLiam.Howlett%40oracle.com%3E > wrote:
>
> * Yajun Deng <yajun.deng@...ux.dev> [250905 09:21]:
>
> >
> > If the anon_vma_alloc() is called, the num_children of the parent of
> > the anon_vma will be updated. But this operation occurs outside of
> > anon_vma_alloc().
> >
> > The num_active_vmas are also updated outside of anon_vma.
> >
> > Pass the parent of anon_vma to the anon_vma_alloc() and update the
> > num_children inside it.
> >
> > Introduce anon_vma_attach() and anon_vma_detach() to update
> > num_active_vmas with the anon_vma.
> >
> > Signed-off-by: Yajun Deng <yajun.deng@...ux.dev>
> > ---
> > mm/rmap.c | 63 ++++++++++++++++++++++++++++---------------------------
> > 1 file changed, 32 insertions(+), 31 deletions(-)
> >
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 34333ae3bd80..2a28edfa5734 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -86,15 +86,21 @@
> > static struct kmem_cache *anon_vma_cachep;
> > static struct kmem_cache *anon_vma_chain_cachep;
> >
> > -static inline struct anon_vma *anon_vma_alloc(void)
> > +static inline struct anon_vma *anon_vma_alloc(struct anon_vma *parent)
> > {
> > struct anon_vma *anon_vma;
> >
> > anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
> > - if (anon_vma) {
> > - atomic_set(&anon_vma->refcount, 1);
> > - anon_vma->num_children = 0;
> > - anon_vma->num_active_vmas = 0;
> > + if (!anon_vma)
> > + return NULL;
> > +
> > + atomic_set(&anon_vma->refcount, 1);
> > + anon_vma->num_children = 0;
> > + anon_vma->num_active_vmas = 0;
> > + if (parent) {
> > + anon_vma->parent = parent;
> > + anon_vma->root = parent->root;
> > + } else {
> > anon_vma->parent = anon_vma;
> > /*
> > * Initialise the anon_vma root to point to itself. If called
> > @@ -102,6 +108,7 @@ static inline struct anon_vma *anon_vma_alloc(void)
> > */
> > anon_vma->root = anon_vma;
> > }
> > + anon_vma->parent->num_children++;
> >
> > return anon_vma;
> > }
> > @@ -146,6 +153,19 @@ static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
> > kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
> > }
> >
> > +static inline void anon_vma_attach(struct vm_area_struct *vma,
> > + struct anon_vma *anon_vma)
> > +{
> > + vma->anon_vma = anon_vma;
> > + vma->anon_vma->num_active_vmas++;
> > +}
> > +
> > +static inline void anon_vma_detach(struct vm_area_struct *vma)
> > +{
> > + vma->anon_vma->num_active_vmas--;
> > + vma->anon_vma = NULL;
> > +}
> > +
> >
> It is a bit odd that you are setting a vma value with the prefix of
> anon_vma. Surely there is a better name: vma_attach_anon() ? And since
> this is editing the vma, should it be in rmap.c or vma.h?
>
I will move them to vma.h.
> >
> > static void anon_vma_chain_link(struct vm_area_struct *vma,
> > struct anon_vma_chain *avc,
> > struct anon_vma *anon_vma)
> > @@ -198,10 +218,9 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> > anon_vma = find_mergeable_anon_vma(vma);
> > allocated = NULL;
> > if (!anon_vma) {
> > - anon_vma = anon_vma_alloc();
> > + anon_vma = anon_vma_alloc(NULL);
> >
> I don't love passing NULL for parent, it's two if statements to do the
> same work as before - we already know that parent is NULL by this point,
> but we call a function to check it again.
>
I will add a wapper function.
> >
> > if (unlikely(!anon_vma))
> > goto out_enomem_free_avc;
> > - anon_vma->num_children++; /* self-parent link for new root */
> > allocated = anon_vma;
> > }
> >
> > @@ -209,9 +228,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> > /* page_table_lock to protect against threads */
> > spin_lock(&mm->page_table_lock);
> > if (likely(!vma->anon_vma)) {
> > - vma->anon_vma = anon_vma;
> > + anon_vma_attach(vma, anon_vma);
> > anon_vma_chain_link(vma, avc, anon_vma);
> > - anon_vma->num_active_vmas++;
> > allocated = NULL;
> > avc = NULL;
> > }
> > @@ -306,10 +324,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > if (!dst->anon_vma && src->anon_vma &&
> > anon_vma->num_children < 2 &&
> > anon_vma->num_active_vmas == 0)
> > - dst->anon_vma = anon_vma;
> > + anon_vma_attach(dst, anon_vma);
> > }
> > - if (dst->anon_vma)
> > - dst->anon_vma->num_active_vmas++;
> > unlock_anon_vma_root(root);
> > return 0;
> >
> anon_vma_clone() has a goto label of enomem_failure that needs to be
> handled correctly. Looks like you have to avoid zeroing dst before
> unlink_anon_vmas(vma) there.
>
Yes, it's an error.
> >
> > @@ -356,31 +372,22 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> > return 0;
> >
> > /* Then add our own anon_vma. */
> > - anon_vma = anon_vma_alloc();
> > + anon_vma = anon_vma_alloc(pvma->anon_vma);
> > if (!anon_vma)
> > goto out_error;
> > - anon_vma->num_active_vmas++;
> > avc = anon_vma_chain_alloc(GFP_KERNEL);
> > if (!avc)
> > goto out_error_free_anon_vma;
> >
> At this point anon_vma has a parent set and the parent->num_children++,
> but vma->anon_vma != anon_vma yet. If avc fails here, we will put the
> anon_vma but leave the parent with num_children incremented, since
> unlink_anon_vmas() will not find anything.
>
Yes, it's an error.
> >
> > - /*
> > - * The root anon_vma's rwsem is the lock actually used when we
> > - * lock any of the anon_vmas in this anon_vma tree.
> > - */
> >
> This information is lost when adding the parent passthrough.
>
I'll add it back.
> >
> > - anon_vma->root = pvma->anon_vma->root;
> > - anon_vma->parent = pvma->anon_vma;
> > /*
> > * With refcounts, an anon_vma can stay around longer than the
> > * process it belongs to. The root anon_vma needs to be pinned until
> > * this anon_vma is freed, because the lock lives in the root.
> > */
> > get_anon_vma(anon_vma->root);
> > - /* Mark this anon_vma as the one where our new (COWed) pages go. */
> > - vma->anon_vma = anon_vma;
> > + anon_vma_attach(vma, anon_vma);
> >
> So now we are in the same situation, we know what we need to do with the
> parent, but we have to run through another if statement to get it to
> happen instead of assigning it.
>
Some code like it.
init_tg_rt_entry() has two callers. One has a parent, the other does not.
> >
> > anon_vma_lock_write(anon_vma);
> > anon_vma_chain_link(vma, avc, anon_vma);
> > - anon_vma->parent->num_children++;
> > anon_vma_unlock_write(anon_vma);
> >
> > return 0;
> > @@ -419,15 +426,9 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
> > list_del(&avc->same_vma);
> > anon_vma_chain_free(avc);
> > }
> > - if (vma->anon_vma) {
> > - vma->anon_vma->num_active_vmas--;
> > + if (vma->anon_vma)
> > + anon_vma_detach(vma);
> >
> > - /*
> > - * vma would still be needed after unlink, and anon_vma will be prepared
> > - * when handle fault.
> > - */
> >
> It is still worth keeping the comment here too.
>
Okay.
> >
> > - vma->anon_vma = NULL;
> > - }
> > unlock_anon_vma_root(root);
> >
> > /*
> > --
> > 2.25.1
> >
>
Powered by blists - more mailing lists