[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220717130134.njpljof5ebeofsx6@revolver>
Date: Sun, 17 Jul 2022 13:01:42 +0000
From: Liam Howlett <liam.howlett@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
CC: "maple-tree@...ts.infradead.org" <maple-tree@...ts.infradead.org>,
"linux-mm@...ck.org" <linux-mm@...ck.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Yu Zhao <yuzhao@...gle.com>, Hugh Dickins <hughd@...gle.com>
Subject: Re: [PATCH v11 00/69] Introducing the Maple Tree
* Andrew Morton <akpm@...ux-foundation.org> [220717 00:21]:
> On Sun, 17 Jul 2022 02:46:32 +0000 Liam Howlett <liam.howlett@...cle.com> wrote:
>
> > This is the v10 + fixes and a few clean ups.
>
> I'm seeing quite a lot of differences between this and what we had in
> mm-unstable. A surprising amount.
>
> Please check it all?
Sorry, I tried to make checkscript happy and some requested cleanups.
>
>
> --- include/linux/maple_tree.h 2022-07-16 21:05:04.697041964 -0700
> +++ ../25-pre-maple/include/linux/maple_tree.h 2022-07-16 21:05:34.509477799 -0700
> @@ -225,9 +225,9 @@
> * @flags: The maple tree flags
> *
> */
> -#define MTREE_INIT(name, __flags) { \
> - .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \
> - .ma_flags = __flags, \
> +#define MTREE_INIT(name, flags) { \
> + .ma_lock = __SPIN_LOCK_UNLOCKED(name.ma_lock), \
> + .ma_flags = flags, \
> .ma_root = NULL, \
> }
>
> @@ -238,13 +238,13 @@
> * @lock: The external lock
> */
> #ifdef CONFIG_LOCKDEP
> -#define MTREE_INIT_EXT(name, __flags, __lock) { \
> - .ma_external_lock = &(__lock).dep_map, \
> - .ma_flags = (__flags), \
> +#define MTREE_INIT_EXT(name, flags, lock) { \
> + .ma_external_lock = &(lock).dep_map, \
> + .ma_flags = flags, \
> .ma_root = NULL, \
> }
> #else
> -#define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags)
> +#define MTREE_INIT_EXT(name, flags, lock) MTREE_INIT(name, flags)
> #endif
>
> #define DEFINE_MTREE(name) \
> @@ -520,8 +520,8 @@
> * Note: may return the zero entry.
> *
> */
> -#define mas_for_each(__mas, __entry, __max) \
> - while (((__entry) = mas_find((__mas), (__max))) != NULL)
> +#define mas_for_each(mas, entry, max) \
> + while (((entry) = mas_find((mas), (max))) != NULL)
>
>
> /**
> @@ -654,9 +654,9 @@
> *
> * Note: Will not return the zero entry.
> */
> -#define mt_for_each(__tree, __entry, __index, __max) \
> - for (__entry = mt_find(__tree, &(__index), __max); \
> - __entry; __entry = mt_find_after(__tree, &(__index), __max))
> +#define mt_for_each(tree, entry, index, max) \
> + for (entry = mt_find(tree, &(index), max); \
> + entry; entry = mt_find_after(tree, &(index), max))
>
>
> #ifdef CONFIG_DEBUG_MAPLE_TREE
> @@ -665,12 +665,12 @@
>
> void mt_dump(const struct maple_tree *mt);
> void mt_validate(struct maple_tree *mt);
> -#define MT_BUG_ON(__tree, __x) do { \
> +#define MT_BUG_ON(tree, x) do { \
> atomic_inc(&maple_tree_tests_run); \
> - if (__x) { \
> + if (x) { \
> pr_info("BUG at %s:%d (%u)\n", \
> - __func__, __LINE__, __x); \
> - mt_dump(__tree); \
> + __func__, __LINE__, x); \
> + mt_dump(tree); \
> pr_info("Pass: %u Run:%u\n", \
> atomic_read(&maple_tree_tests_passed), \
> atomic_read(&maple_tree_tests_run)); \
> @@ -680,7 +680,7 @@
> } \
> } while (0)
> #else
> -#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
> +#define MT_BUG_ON(tree, x) BUG_ON(x)
> #endif /* CONFIG_DEBUG_MAPLE_TREE */
>
> #endif /*_LINUX_MAPLE_TREE_H */
> --- include/linux/mm.h 2022-07-16 21:05:07.625084770 -0700
> +++ ../25-pre-maple/include/linux/mm.h 2022-07-16 21:05:37.847526599 -0700
> @@ -683,12 +683,11 @@
> return vmi->mas.index;
> }
>
> -#define for_each_vma(__vmi, __vma) \
> - while (((__vma) = vma_next(&(__vmi))) != NULL)
> +#define for_each_vma(vmi, vma) while ((vma = vma_next(&(vmi))) != NULL)
>
> /* The MM code likes to work with exclusive end addresses */
> -#define for_each_vma_range(__vmi, __vma, __end) \
> - while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL)
> +#define for_each_vma_range(vmi, vma, end) \
> + while ((vma = vma_find(&(vmi), (end) - 1)) != NULL)
>
> #ifdef CONFIG_SHMEM
> /*
> --- include/linux/mm_types.h 2022-07-16 21:05:07.625084770 -0700
> +++ ../25-pre-maple/include/linux/mm_types.h 2022-07-16 21:05:37.848526614 -0700
> @@ -681,11 +681,11 @@
> struct ma_state mas;
> };
>
> -#define VMA_ITERATOR(name, __mm, __addr) \
> +#define VMA_ITERATOR(name, mm, addr) \
> struct vma_iterator name = { \
> .mas = { \
> - .tree = &(__mm)->mm_mt, \
> - .index = __addr, \
> + .tree = &mm->mm_mt, \
> + .index = addr, \
> .node = MAS_START, \
> }, \
> }
>
>
>
All of the above changes are attempting to avoid issues with define
using variable names that may be used and potential dereferencing
issues.
>
> --- mm/memory.c 2022-07-16 21:05:07.627084799 -0700
> +++ ../25-pre-maple/mm/memory.c 2022-07-16 21:05:37.980528543 -0700
> @@ -1699,6 +1699,7 @@
> /**
> * unmap_vmas - unmap a range of memory covered by a list of vma's
> * @tlb: address of the caller's struct mmu_gather
> + * @mt: The maple tree pointer for the VMAs
> * @vma: the starting vma
> * @start_addr: virtual address at which to start unmapping
> * @end_addr: virtual address at which to end unmapping
> --- mm/mmap.c 2022-07-16 21:05:07.716086100 -0700
> +++ ../25-pre-maple/mm/mmap.c 2022-07-16 21:05:38.104530356 -0700
> @@ -341,27 +341,28 @@
> MA_STATE(mas, mt, 0, 0);
>
> mt_validate(&mm->mm_mt);
> +
> mas_for_each(&mas, vma_mt, ULONG_MAX) {
> if ((vma_mt->vm_start != mas.index) ||
> (vma_mt->vm_end - 1 != mas.last)) {
> pr_emerg("issue in %s\n", current->comm);
> dump_stack();
> dump_vma(vma_mt);
> - pr_emerg("mt piv: %p %lu - %lu\n", vma_mt,
> + pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
> mas.index, mas.last);
> - pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
> + pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
> vma_mt->vm_start, vma_mt->vm_end);
>
> mt_dump(mas.tree);
> if (vma_mt->vm_end != mas.last + 1) {
> - pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n",
> + pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n",
> mm, vma_mt->vm_start, vma_mt->vm_end,
> mas.index, mas.last);
> mt_dump(mas.tree);
> }
> VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
> if (vma_mt->vm_start != mas.index) {
> - pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n",
> + pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n",
> mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
> mt_dump(mas.tree);
> }
I removed px here to avoid leaking pointers - although this is in debug,
so maybe it's not that critical.
> @@ -448,7 +449,7 @@
> unsigned long vm_start = max(addr, vma->vm_start);
> unsigned long vm_end = min(end, vma->vm_end);
>
> - nr_pages += PHYS_PFN(vm_end - vm_start);
> + nr_pages += (vm_end - vm_start) / PAGE_SIZE;
> }
This was suggested by David, I should have sent it out as a patch before
v11.
>
> return nr_pages;
> @@ -710,11 +711,11 @@
> * remove_next == 1 is case 1 or 7.
> */
> remove_next = 1 + (end > next->vm_end);
> - if (remove_next == 2)
> - next_next = find_vma(mm, next->vm_end);
> -
> + next_next = find_vma(mm, next->vm_end);
> VM_WARN_ON(remove_next == 2 &&
> end != next_next->vm_end);
> + /* trim end to next, for case 6 first pass */
> + end = next->vm_end;
> }
>
> exporter = next;
> @@ -725,7 +726,7 @@
> * next, if the vma overlaps with it.
> */
> if (remove_next == 2 && !next->anon_vma)
> - exporter = next_next;
> + exporter = find_vma(mm, next->vm_end);
>
> } else if (end > next->vm_start) {
> /*
> @@ -762,6 +763,7 @@
> return error;
> }
> }
> +again:
> vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
>
> if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
> @@ -852,8 +854,6 @@
>
> if (remove_next && file) {
> __remove_shared_vm_struct(next, file, mapping);
> - if (remove_next == 2)
> - __remove_shared_vm_struct(next_next, file, mapping);
> } else if (insert) {
> /*
> * split_vma has split insert from vma, and needs
> @@ -881,7 +881,6 @@
> }
>
> if (remove_next) {
> -again:
> if (file) {
> uprobe_munmap(next, next->vm_start, next->vm_end);
> fput(file);
> @@ -890,22 +889,45 @@
> anon_vma_merge(vma, next);
> mm->map_count--;
> mpol_put(vma_policy(next));
> - if (remove_next != 2)
> - BUG_ON(vma->vm_end < next->vm_end);
> + BUG_ON(vma->vm_end < next->vm_end);
> vm_area_free(next);
>
> /*
> * In mprotect's case 6 (see comments on vma_merge),
> - * we must remove next_next too.
> + * we must remove another next too. It would clutter
> + * up the code too much to do both in one go.
> */
> + if (remove_next != 3) {
> + /*
> + * If "next" was removed and vma->vm_end was
> + * expanded (up) over it, in turn
> + * "next->prev->vm_end" changed and the
> + * "vma->next" gap must be updated.
> + */
> + next = next_next;
> + } else {
> + /*
> + * For the scope of the comment "next" and
> + * "vma" considered pre-swap(): if "vma" was
> + * removed, next->vm_start was expanded (down)
> + * over it and the "next" gap must be updated.
> + * Because of the swap() the post-swap() "vma"
> + * actually points to pre-swap() "next"
> + * (post-swap() "next" as opposed is now a
> + * dangling pointer).
> + */
> + next = vma;
> + }
> if (remove_next == 2) {
> + mas_reset(&mas);
> remove_next = 1;
> - next = next_next;
> + end = next->vm_end;
> goto again;
> }
> }
> - if (insert && file)
> + if (insert && file) {
> uprobe_mmap(insert);
> + }
>
> mas_destroy(&mas);
> validate_mm(mm);
> @@ -1613,8 +1635,8 @@
> return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
> }
This is fallout from the syzbot bug from the patch I sent Hugh. This is
the expected difference that had to change a few patches in the series
as it went through. I should say that it's slightly different than
Hugh's copy. This copy searches for the required VMA only when needed.
I actually really like how this turned out since it was able to simplify
__vma_adjust().
>
> -/**
> - * unmapped_area() - Find an area between the low_limit and the high_limit with
> +/*
> + * unmapped_area() Find an area between the low_limit and the high_limit with
> * the correct alignment and offset, all from @info. Note: current->mm is used
> * for the search.
> *
> @@ -1640,15 +1662,12 @@
>
> gap = mas.index;
> gap += (info->align_offset - gap) & info->align_mask;
> - VM_BUG_ON(gap + info->length > info->high_limit);
> - VM_BUG_ON(gap + info->length > mas.last);
> return gap;
> }
>
> -/**
> - * unmapped_area_topdown() - Find an area between the low_limit and the
> +/* unmapped_area_topdown() Find an area between the low_limit and the
> * high_limit with * the correct alignment and offset at the highest available
> - * address, all from @info. Note: current->mm is used for the search.
> + * address, all from * @info. Note: current->mm is used for the search.
> *
> * @info: The unmapped area information including the range (low_limit -
> * hight_limit), the alignment offset and mask.
> @@ -1671,8 +1690,6 @@
>
> gap = mas.last + 1 - info->length;
> gap -= (gap - info->align_offset) & info->align_mask;
> - VM_BUG_ON(gap < info->low_limit);
> - VM_BUG_ON(gap < mas.index);
> return gap;
> }
These were added in an attempt to restore the previous VM_BUG_ON(). I
must have errors in them. These should be dropped until better tested.
>
> @@ -1884,12 +1901,12 @@
> EXPORT_SYMBOL(find_vma_intersection);
>
> /**
> - * find_vma() - Find the VMA for a given address, or the next VMA.
> + * find_vma() - Find the VMA for a given address, or the next vma.
> * @mm: The mm_struct to check
> * @addr: The address
> *
> - * Returns: The VMA associated with addr, or the next VMA.
> - * May return %NULL in the case of no VMA at addr or above.
> + * Returns: The VMA associated with addr, or the next vma.
> + * May return %NULL in the case of no vma at addr or above.
> */
> struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
> {
> @@ -2642,7 +2659,7 @@
> (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
> pgoff, vma->vm_userfaultfd_ctx, NULL) :
> can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
> - NULL_VM_UFFD_CTX, NULL))) {
> + NULL_VM_UFFD_CTX , NULL))) {
> merge_start = prev->vm_start;
> vma = prev;
> vm_pgoff = prev->vm_pgoff;
> @@ -3036,7 +3053,6 @@
> unsigned long addr, unsigned long len, unsigned long flags)
> {
> struct mm_struct *mm = current->mm;
> -
> validate_mm_mt(mm);
>
> /*
> @@ -3092,7 +3108,7 @@
> vma->vm_flags = flags;
> vma->vm_page_prot = vm_get_page_prot(flags);
> mas_set_range(mas, vma->vm_start, addr + len - 1);
> - if (mas_store_gfp(mas, vma, GFP_KERNEL))
> + if ( mas_store_gfp(mas, vma, GFP_KERNEL))
> goto mas_store_fail;
>
> mm->map_count++;
> @@ -3155,7 +3171,7 @@
> vma = mas_prev(&mas, 0);
> if (!vma || vma->vm_end != addr || vma_policy(vma) ||
> !can_vma_merge_after(vma, flags, NULL, NULL,
> - addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL))
> + addr >> PAGE_SHIFT,NULL_VM_UFFD_CTX, NULL))
> vma = NULL;
>
> ret = do_brk_flags(&mas, vma, addr, len, flags);
These are all cosmetic fixes for checkscript. I now regret making these
changes as I increased confusion for very little gain - plus the second
from the last is incorrect.
I should have prioritized stability over cleaner code this late in the
cycle. Let me know if you'd like me to respin with just the addition of
the one fix sent to Hugh.
Thanks,
Liam
Powered by blists - more mailing lists