[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220126202857.y53bz24zom2znb5i@revolver>
Date: Wed, 26 Jan 2022 20:29:10 +0000
From: Liam Howlett <liam.howlett@...cle.com>
To: Vlastimil Babka <vbabka@...e.cz>
CC: "maple-tree@...ts.infradead.org" <maple-tree@...ts.infradead.org>,
"linux-mm@...ck.org" <linux-mm@...ck.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Song Liu <songliubraving@...com>,
Davidlohr Bueso <dave@...olabs.net>,
"Paul E . McKenney" <paulmck@...nel.org>,
Matthew Wilcox <willy@...radead.org>,
Laurent Dufour <ldufour@...ux.ibm.com>,
David Rientjes <rientjes@...gle.com>,
Axel Rasmussen <axelrasmussen@...gle.com>,
Suren Baghdasaryan <surenb@...gle.com>,
Rik van Riel <riel@...riel.com>,
Peter Zijlstra <peterz@...radead.org>,
Michel Lespinasse <walken.cr@...il.com>,
Jerome Glisse <jglisse@...hat.com>,
Minchan Kim <minchan@...gle.com>,
Joel Fernandes <joelaf@...gle.com>,
Rom Lemarchand <romlem@...gle.com>
Subject: Re: [PATCH v4 65/66] mm: Remove the vma linked list
* Vlastimil Babka <vbabka@...e.cz> [220120 12:41]:
> On 12/1/21 15:30, Liam Howlett wrote:
> > From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
> >
> > Replace any vm_next use with vma_find().
> >
> > Update free_pgtables(), unmap_vmas(), and zap_page_range() to use the
> > maple tree.
>
> > Use the new free_pgtables() and unmap_vmas() in do_mas_align_munmap().
> > At the same time, alter the loop to be more compact.
> >
> > Now that free_pgtables() and unmap_vmas() take a maple tree as an
> > argument, rearrange do_mas_align_munmap() to use the new table to hold
> > the lock
>
> table or tree?
tree, thanks.
>
> > Remove __vma_link_list() and __vma_unlink_list() as they are exclusively
> > used to update the linked list
> >
> > Rework validation of tree as it was depending on the linked list.
> >
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@...cle.com>
>
> git grep shows that some usages of 'vm_next' and 'vm_prev' remain after this
> patch, including some exotic arch code.
I must have missed them being added during the development cycle of
maple tree.. except parisc; parisc has a block of code left in an #if 0
so it's not lost - Good thing it's in CVS now so it's safe :)
Thanks, riscv will require a new patch.
damon test code will require a new patch - I will add this to the damon
conversion patch.
>
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -398,12 +398,21 @@ void free_pgd_range(struct mmu_gather *tlb,
> > } while (pgd++, addr = next, addr != end);
> > }
> >
> > -void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > - unsigned long floor, unsigned long ceiling)
> > +void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
> > + struct vm_area_struct *vma, unsigned long floor,
> > + unsigned long ceiling)
> > {
> > - while (vma) {
> > - struct vm_area_struct *next = vma->vm_next;
> > + MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> > +
> > + do {
> > unsigned long addr = vma->vm_start;
> > + struct vm_area_struct *next;
> > +
> > + /*
> > + * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
> > + * be 0. This will underflow and is okay.
> > + */
> > + next = mas_find(&mas, ceiling - 1);
> >
> > /*
> > * Hide vma from rmap and truncate_pagecache before freeing
> > @@ -422,7 +431,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > while (next && next->vm_start <= vma->vm_end + PMD_SIZE
> > && !is_vm_hugetlb_page(next)) {
> > vma = next;
> > - next = vma->vm_next;
> > + next = mas_find(&mas, ceiling - 1);
> > unlink_anon_vmas(vma);
> > unlink_file_vma(vma);
> > }
> > @@ -430,7 +439,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > floor, next ? next->vm_start : ceiling);
> > }
> > vma = next;
> > - }
> > + } while (vma);
> > }
> >
> > void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
> > @@ -1602,17 +1611,19 @@ static void unmap_single_vma(struct mmu_gather *tlb,
> > * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
> > * drops the lock and schedules.
> > */
> > -void unmap_vmas(struct mmu_gather *tlb,
> > +void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
> > struct vm_area_struct *vma, unsigned long start_addr,
> > unsigned long end_addr)
> > {
> > struct mmu_notifier_range range;
> > + MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> >
> > mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
> > start_addr, end_addr);
> > mmu_notifier_invalidate_range_start(&range);
> > - for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
> > + do {
> > unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
> > + } while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
> > mmu_notifier_invalidate_range_end(&range);
> > }
> >
> > @@ -1627,8 +1638,11 @@ void unmap_vmas(struct mmu_gather *tlb,
> > void zap_page_range(struct vm_area_struct *vma, unsigned long start,
> > unsigned long size)
> > {
> > + struct maple_tree *mt = &vma->vm_mm->mm_mt;
>
> Well looks like that's also an option to avoid a new parameter :)
>
> > + unsigned long end = start + size;
> > struct mmu_notifier_range range;
> > struct mmu_gather tlb;
> > + MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> >
> > lru_add_drain();
> > mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
> > @@ -1636,8 +1650,9 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
> > tlb_gather_mmu(&tlb, vma->vm_mm);
> > update_hiwater_rss(vma->vm_mm);
> > mmu_notifier_invalidate_range_start(&range);
> > - for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
> > + do {
> > unmap_single_vma(&tlb, vma, start, range.end, NULL);
> > + } while ((vma = mas_find(&mas, end - 1)) != NULL);
> > mmu_notifier_invalidate_range_end(&range);
> > tlb_finish_mmu(&tlb);
> > }
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index dde74e0b195d..e13c6ef76697 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -74,9 +74,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
> > static bool ignore_rlimit_data;
> > core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
> >
> > -static void unmap_region(struct mm_struct *mm,
> > +static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
> > struct vm_area_struct *vma, struct vm_area_struct *prev,
> > - unsigned long start, unsigned long end);
> > + struct vm_area_struct *next, unsigned long start,
> > + unsigned long end);
> >
> > /* description of effects of mapping type and prot in current implementation.
> > * this is due to the limited x86 page protection hardware. The expected
> > @@ -173,10 +174,8 @@ void unlink_file_vma(struct vm_area_struct *vma)
> > /*
> > * Close a vm structure and free it, returning the next.
>
> No longer returning the next.
ack
>
> > */
> > -static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
> > +static void remove_vma(struct vm_area_struct *vma)
> > {
> > - struct vm_area_struct *next = vma->vm_next;
> > -
> > might_sleep();
> > if (vma->vm_ops && vma->vm_ops->close)
> > vma->vm_ops->close(vma);
>
> <snip>
>
> > */
> > struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
> > {
> > + MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
> > struct anon_vma *anon_vma = NULL;
> > + struct vm_area_struct *prev, *next;
> >
> > /* Try next first. */
> > - if (vma->vm_next) {
> > - anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
> > + next = mas_walk(&mas);
> > + if (next) {
> > + anon_vma = reusable_anon_vma(next, vma, next);
> > if (anon_vma)
> > return anon_vma;
> > }
> >
> > + prev = mas_prev(&mas, 0);
> > + VM_BUG_ON_VMA(prev != vma, vma);
> > + prev = mas_prev(&mas, 0);
> > /* Try prev next. */
> > - if (vma->vm_prev)
> > - anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
> > + if (prev)
> > + anon_vma = reusable_anon_vma(prev, prev, vma);
> >
> > /*
> > * We might reach here with anon_vma == NULL if we can't find
> > @@ -1906,10 +1825,10 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
> > unsigned long start_addr,
> > unsigned long end_addr)
> > {
> > - MA_STATE(mas, &mm->mm_mt, start_addr, start_addr);
> > + unsigned long index = start_addr;
> >
> > mmap_assert_locked(mm);
> > - return mas_find(&mas, end_addr - 1);
> > + return mt_find(&mm->mm_mt, &index, end_addr - 1);
>
> Why is this now changed again?
I found this with one of your previous comments, I have a fix.
>
> > }
> > EXPORT_SYMBOL(find_vma_intersection);
> >
> > @@ -1923,8 +1842,10 @@ EXPORT_SYMBOL(find_vma_intersection);
> > */
> > inline struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
> > {
> > - // Note find_vma_intersection will decrease 0 to underflow to ULONG_MAX
> > - return find_vma_intersection(mm, addr, 0);
> > + unsigned long index = addr;
> > +
> > + mmap_assert_locked(mm);
> > + return mt_find(&mm->mm_mt, &index, ULONG_MAX);
>
> And here.
Ditto.
>
> > }
> > EXPORT_SYMBOL(find_vma);
> >
> > @@ -2026,7 +1947,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
> > if (gap_addr < address || gap_addr > TASK_SIZE)
> > gap_addr = TASK_SIZE;
> >
> > - next = vma->vm_next;
> > + next = vma_find(mm, vma->vm_end);
> > if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
> > if (!(next->vm_flags & VM_GROWSUP))
> > return -ENOMEM;
> > @@ -2072,8 +1993,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
> > vma->vm_end = address;
> > vma_store(mm, vma);
> > anon_vma_interval_tree_post_update_vma(vma);
> > - if (!vma->vm_next)
> > - mm->highest_vm_end = vm_end_gap(vma);
> > spin_unlock(&mm->page_table_lock);
> >
> > perf_event_mmap(vma);
> > @@ -2100,7 +2019,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
> > return -EPERM;
> >
> > /* Enforce stack_guard_gap */
> > - prev = vma->vm_prev;
> > + find_vma_prev(mm, vma->vm_start, &prev);
> > /* Check that both stack segments have the same anon_vma? */
> > if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
> > vma_is_accessible(prev)) {
> > @@ -2235,20 +2154,22 @@ EXPORT_SYMBOL_GPL(find_extend_vma);
> > *
> > * Called with the mm semaphore held.
>
> Above this, the comment talks about vma list, update?
I will update the comment.
>
> > */
> > -static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
> > +static inline void remove_mt(struct mm_struct *mm, struct maple_tree *detached)
> > {
> > unsigned long nr_accounted = 0;
> > + unsigned long index = 0;
> > + struct vm_area_struct *vma;
> >
> > /* Update high watermark before we lower total_vm */
> > update_hiwater_vm(mm);
> > - do {
> > + mt_for_each(detached, vma, index, ULONG_MAX) {
> > long nrpages = vma_pages(vma);
> >
> > if (vma->vm_flags & VM_ACCOUNT)
> > nr_accounted += nrpages;
> > vm_stat_account(mm, vma->vm_flags, -nrpages);
> > - vma = remove_vma(vma);
> > - } while (vma);
> > + remove_vma(vma);
> > + }
> > vm_unacct_memory(nr_accounted);
> > validate_mm(mm);
> > }
Powered by blists - more mailing lists