[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <654dd596-3875-4ab4-acdf-9e5f547b5551@lucifer.local>
Date: Tue, 6 Aug 2024 15:20:49 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Petr Tesařík <petr@...arici.cz>
Cc: linux-mm@...ck.org, linux-kernel@...r.kernel.org,
Andrew Morton <akpm@...ux-foundation.org>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>
Subject: Re: [PATCH 02/10] mm: introduce vma_merge_struct and abstract merge
parameters
On Tue, Aug 06, 2024 at 04:06:50PM GMT, Petr Tesařík wrote:
> On Tue, 6 Aug 2024 14:43:48 +0100
> Lorenzo Stoakes <lorenzo.stoakes@...cle.com> wrote:
>
> > On Tue, Aug 06, 2024 at 02:47:54PM GMT, Petr Tesařík wrote:
> > > Hi Lorenzo!
> > >
> > > On Mon, 5 Aug 2024 13:13:49 +0100
> > > Lorenzo Stoakes <lorenzo.stoakes@...cle.com> wrote:
> > >
> > > > Rather than passing around huge numbers of parameters to numerous helper
> > > > functions, abstract them into a single struct that we thread through the
> > > > operation.
> > > >
> > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> > > > ---
> > > > mm/mmap.c | 76 ++++++++------
> > > > mm/vma.c | 297 ++++++++++++++++++++++++++++++++++++++----------------
> > > > mm/vma.h | 92 ++++++++---------
> > > > 3 files changed, 294 insertions(+), 171 deletions(-)
> > > >
> > > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > > index 4a9c2329b09a..f931000c561f 100644
> > > > --- a/mm/mmap.c
> > > > +++ b/mm/mmap.c
> > > > @@ -1369,9 +1369,16 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > > unsigned long end = addr + len;
> > > > unsigned long merge_start = addr, merge_end = end;
> > > > bool writable_file_mapping = false;
> > > > - pgoff_t vm_pgoff;
> > > > int error;
> > > > VMA_ITERATOR(vmi, mm, addr);
> > > > + struct vma_merge_struct vmg = {
> > > > + .vmi = &vmi,
> > > > + .start = addr,
> > > > + .end = end,
> > > > + .flags = vm_flags,
> > > > + .pgoff = pgoff,
> > > > + .file = file,
> > > > + };
> > > >
> > > > /* Check against address space limit. */
> > > > if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
> > > > @@ -1405,8 +1412,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > > vm_flags |= VM_ACCOUNT;
> > > > }
> > > >
> > > > - next = vma_next(&vmi);
> > > > - prev = vma_prev(&vmi);
> > > > + next = vmg.next = vma_next(&vmi);
> > > > + prev = vmg.prev = vma_prev(&vmi);
> > >
> > > So, next is now a shortcut for vmg.next, and prev is a shortcut for
> > > vmg.prev. ATM there is only one assignment, so no big deal, but I
> > > wonder if next and prev could be removed instead, same as you replaced
> > > vm_pgoff with vmg.pgoff.
> >
> > It's simply to avoid repeatedly referencing vmg.xxx / at least reduce
> > _some_ churn. Also this will get moved shortly, so it's worth looking at in
> > final form.
>
> I'm not a MM maintainer, so my comments may not be relevant, but my
> experience shows that pointer aliases have a potential to introduce all
> kinds of subtle bugs. That's the reason I generally try to avoid them.
Right, I understand, I don't want to get too deep into a distracting bike
shed when this series is doing something quite major.
If you feel this is absolutely critical, I can adjust this code that I
later delete, if not I suggest leaving it as it is.
>
> >
> > >
> > > Is the resulting code _too_ ugly?
> > >
> > > > if (vm_flags & VM_SPECIAL) {
> > > > if (prev)
> > > > vma_iter_next_range(&vmi);
> > > > @@ -1416,29 +1423,30 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > > /* Attempt to expand an old mapping */
> > > > /* Check next */
> > > > if (next && next->vm_start == end && !vma_policy(next) &&
> > > > - can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
> > > > - NULL_VM_UFFD_CTX, NULL)) {
> > > > + can_vma_merge_before(&vmg)) {
> > > > merge_end = next->vm_end;
> > > > vma = next;
> > > > - vm_pgoff = next->vm_pgoff - pglen;
> > > > + vmg.pgoff = next->vm_pgoff - pglen;
> > > > + }
> > > > +
> > > > + if (vma) {
> > > > + vmg.anon_vma = vma->anon_vma;
> > > > + vmg.uffd_ctx = vma->vm_userfaultfd_ctx;
> > > > }
> > > >
> > > > /* Check prev */
> > > > if (prev && prev->vm_end == addr && !vma_policy(prev) &&
> > > > - (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
> > > > - pgoff, vma->vm_userfaultfd_ctx, NULL) :
> > > > - can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
> > > > - NULL_VM_UFFD_CTX, NULL))) {
> > > > + can_vma_merge_after(&vmg)) {
> > > > merge_start = prev->vm_start;
> > > > vma = prev;
> > > > - vm_pgoff = prev->vm_pgoff;
> > > > + vmg.pgoff = prev->vm_pgoff;
> > > > } else if (prev) {
> > > > vma_iter_next_range(&vmi);
> > > > }
> > > >
> > > > /* Actually expand, if possible */
> > > > if (vma &&
> > > > - !vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {
> > > > + !vma_expand(&vmi, vma, merge_start, merge_end, vmg.pgoff, next)) {
> > > > khugepaged_enter_vma(vma, vm_flags);
> > > > goto expanded;
> > > > }
> > > > @@ -1790,25 +1798,31 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
> > > > * Expand the existing vma if possible; Note that singular lists do not
> > > > * occur after forking, so the expand will only happen on new VMAs.
> > > > */
> > > > - if (vma && vma->vm_end == addr && !vma_policy(vma) &&
> > > > - can_vma_merge_after(vma, flags, NULL, NULL,
> > > > - addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
> > > > - vma_iter_config(vmi, vma->vm_start, addr + len);
> > > > - if (vma_iter_prealloc(vmi, vma))
> > > > - goto unacct_fail;
> > > > -
> > > > - vma_start_write(vma);
> > > > -
> > > > - init_vma_prep(&vp, vma);
> > > > - vma_prepare(&vp);
> > > > - vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
> > > > - vma->vm_end = addr + len;
> > > > - vm_flags_set(vma, VM_SOFTDIRTY);
> > > > - vma_iter_store(vmi, vma);
> > > > -
> > > > - vma_complete(&vp, vmi, mm);
> > > > - khugepaged_enter_vma(vma, flags);
> > > > - goto out;
> > > > + if (vma && vma->vm_end == addr && !vma_policy(vma)) {
> > > > + struct vma_merge_struct vmg = {
> > > > + .prev = vma,
> > > > + .flags = flags,
> > > > + .pgoff = addr >> PAGE_SHIFT,
> > > > + };
> > > > +
> > > > + if (can_vma_merge_after(&vmg)) {
> > > > + vma_iter_config(vmi, vma->vm_start, addr + len);
> > > > + if (vma_iter_prealloc(vmi, vma))
> > > > + goto unacct_fail;
> > > > +
> > > > + vma_start_write(vma);
> > > > +
> > > > + init_vma_prep(&vp, vma);
> > > > + vma_prepare(&vp);
> > > > + vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
> > > > + vma->vm_end = addr + len;
> > > > + vm_flags_set(vma, VM_SOFTDIRTY);
> > > > + vma_iter_store(vmi, vma);
> > > > +
> > > > + vma_complete(&vp, vmi, mm);
> > > > + khugepaged_enter_vma(vma, flags);
> > > > + goto out;
> > > > + }
> > > > }
> > > >
> > > > if (vma)
> > > > diff --git a/mm/vma.c b/mm/vma.c
> > > > index bf0546fe6eab..20c4ce7712c0 100644
> > > > --- a/mm/vma.c
> > > > +++ b/mm/vma.c
> > > > @@ -7,16 +7,18 @@
> > > > #include "vma_internal.h"
> > > > #include "vma.h"
> > > >
> > > > -/*
> > > > - * If the vma has a ->close operation then the driver probably needs to release
> > > > - * per-vma resources, so we don't attempt to merge those if the caller indicates
> > > > - * the current vma may be removed as part of the merge.
> > > > - */
> > > > -static inline bool is_mergeable_vma(struct vm_area_struct *vma,
> > > > - struct file *file, unsigned long vm_flags,
> > > > - struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> > > > - struct anon_vma_name *anon_name, bool may_remove_vma)
> > > > +static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_next)
> > > > {
> > > > + struct vm_area_struct *vma = merge_next ? vmg->next : vmg->prev;
> > > > + /*
> > > > + * If the vma has a ->close operation then the driver probably needs to
> > > > + * release per-vma resources, so we don't attempt to merge those if the
> > > > + * caller indicates the current vma may be removed as part of the merge,
> > > > + * which is the case if we are attempting to merge the next VMA into
> > > > + * this one.
> > > > + */
> > > > + bool may_remove_vma = merge_next;
> > > > +
> > >
> > > This variable is used only once. If you want to clarify the double
> > > meaning of the merge_next parameter, consider moving this comment
> > > further down to the conditional and merely renaming the parameter.
> > >
> > > > /*
> > > > * VM_SOFTDIRTY should not prevent from VMA merging, if we
> > > > * match the flags but dirty bit -- the caller should mark
> > > > @@ -25,15 +27,15 @@ static inline bool is_mergeable_vma(struct vm_area_struct *vma,
> > > > * the kernel to generate new VMAs when old one could be
> > > > * extended instead.
> > > > */
> > > > - if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
> > > > + if ((vma->vm_flags ^ vmg->flags) & ~VM_SOFTDIRTY)
> > > > return false;
> > > > - if (vma->vm_file != file)
> > > > + if (vma->vm_file != vmg->file)
> > > > return false;
> > > > if (may_remove_vma && vma->vm_ops && vma->vm_ops->close)
> > >
> > > AFAICS this is the only place where may_remove_vma is used.
> >
> > Yes it is, but the point is to document what we're doing. The compiler
> > simplifies all this in the generated code.
>
> What's wrong with moving the comment for this variable before this
> conditional?
Because in kernel-style C you have to put declarations at the top, the
parameter was originally called may_remove_vma, and it's self-documenting
by having that name.
Do note that I ultimately remove this code in patch 10.
This feels very bike-sheddy.
>
> > >
> > > > return false;
> > > > - if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
> > > > + if (!is_mergeable_vm_userfaultfd_ctx(vma, vmg->uffd_ctx))
> > > > return false;
> > > > - if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
> > > > + if (!anon_vma_name_eq(anon_vma_name(vma), vmg->anon_name))
> > > > return false;
> > > > return true;
> > > > }
> > > > @@ -94,16 +96,16 @@ static void init_multi_vma_prep(struct vma_prepare *vp,
> > > > * We assume the vma may be removed as part of the merge.
> > > > */
> > > > bool
> > > > -can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
> > > > - struct anon_vma *anon_vma, struct file *file,
> > > > - pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> > > > - struct anon_vma_name *anon_name)
> > > > +can_vma_merge_before(struct vma_merge_struct *vmg)
> > > > {
> > > > - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, true) &&
> > > > - is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> > > > - if (vma->vm_pgoff == vm_pgoff)
> > > > + pgoff_t pglen = PHYS_PFN(vmg->end - vmg->start);
> > > > +
> > > > + if (is_mergeable_vma(vmg, true) &&
> > > > + is_mergeable_anon_vma(vmg->anon_vma, vmg->next->anon_vma, vmg->next)) {
> > > > + if (vmg->next->vm_pgoff == vmg->pgoff + pglen)
> > > > return true;
> > > > }
> > > > +
> > > > return false;
> > > > }
> > > >
> > > > @@ -116,18 +118,11 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
> > > > *
> > > > * We assume that vma is not removed as part of the merge.
> > > > */
> > > > -bool
> > > > -can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
> > > > - struct anon_vma *anon_vma, struct file *file,
> > > > - pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> > > > - struct anon_vma_name *anon_name)
> > > > +bool can_vma_merge_after(struct vma_merge_struct *vmg)
> > > > {
> > > > - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, false) &&
> > > > - is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> > > > - pgoff_t vm_pglen;
> > > > -
> > > > - vm_pglen = vma_pages(vma);
> > > > - if (vma->vm_pgoff + vm_pglen == vm_pgoff)
> > > > + if (is_mergeable_vma(vmg, false) &&
> > > > + is_mergeable_anon_vma(vmg->anon_vma, vmg->prev->anon_vma, vmg->prev)) {
> > > > + if (vmg->prev->vm_pgoff + vma_pages(vmg->prev) == vmg->pgoff)
> > > > return true;
> > > > }
> > > > return false;
> > > > @@ -180,7 +175,7 @@ void unmap_region(struct mm_struct *mm, struct ma_state *mas,
> > > > * VMA Iterator will point to the end VMA.
> > > > */
> > > > static int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
> > > > - unsigned long addr, int new_below)
> > > > + unsigned long addr, bool new_below)
> > > > {
> > > > struct vma_prepare vp;
> > > > struct vm_area_struct *new;
> > > > @@ -261,13 +256,14 @@ static int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
> > > > * Split a vma into two pieces at address 'addr', a new vma is allocated
> > > > * either for the first part or the tail.
> > > > */
> > > > -static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
> > > > - unsigned long addr, int new_below)
> > > > +static int split_vma(struct vma_merge_struct *vmg, bool new_below)
> > >
> > > IMHO this patch is already long enough. Maybe the type change from int
> > > to bool could be split out to a separate patch to reduce churn here?
> >
> > I don't really understand this comment. This reduces the number of lines of
> > code, and it's a line I have to change anyway, so there'd be _more_ churn
> > to split this out?
> >
> > I don't think this is really all that important, but it'd be very silly to
> > split this out in my opinion.
>
> Possibly a matter of taste. The churn is further down:
>
> >
> > >
> > > > {
> > > > - if (vma->vm_mm->map_count >= sysctl_max_map_count)
> > > > + if (vmg->vma->vm_mm->map_count >= sysctl_max_map_count)
> > > > return -ENOMEM;
> > > >
> > > > - return __split_vma(vmi, vma, addr, new_below);
> > > > + return __split_vma(vmg->vmi, vmg->vma,
> > > > + new_below ? vmg->start : vmg->end,
> > > > + new_below);
> > > > }
> > > >
> > > > /*
> > > > @@ -712,7 +708,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
> > > > if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
> > > > goto map_count_exceeded;
> > > >
> > > > - error = __split_vma(vmi, vma, start, 1);
> > > > + error = __split_vma(vmi, vma, start, true);
>
> Churn here.
>
> > > > if (error)
> > > > goto start_split_failed;
> > > > }
> > > > @@ -725,7 +721,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
> > > > do {
> > > > /* Does it split the end? */
> > > > if (next->vm_end > end) {
> > > > - error = __split_vma(vmi, next, end, 0);
> > > > + error = __split_vma(vmi, next, end, false);
>
> Churn here.
>
Right, this is extremely silly. Churn isn't a synonym for 'literally any
change that you don't think has immediate value'. It implies _significant_
changes made for little to no value.
This is an absolutely tiny change, made when already changing the signature
that improves the code quality.
> But you're right, no big deal.
>
I'm glad we agree on that :)
> > > > if (error)
> > > > goto end_split_failed;
> > > > }
> > > > @@ -934,16 +930,10 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
> > > > * **** is not represented - it will be merged and the vma containing the
> > > > * area is returned, or the function will return NULL
> > > > */
> > > > -static struct vm_area_struct
> > > > -*vma_merge(struct vma_iterator *vmi, struct vm_area_struct *prev,
> > > > - struct vm_area_struct *src, unsigned long addr, unsigned long end,
> > > > - unsigned long vm_flags, pgoff_t pgoff, struct mempolicy *policy,
> > > > - struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> > > > - struct anon_vma_name *anon_name)
> > > > +static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
> > > > {
> > > > - struct mm_struct *mm = src->vm_mm;
> > > > - struct anon_vma *anon_vma = src->anon_vma;
> > > > - struct file *file = src->vm_file;
> > > > + struct mm_struct *mm = container_of(vmg->vmi->mas.tree, struct mm_struct, mm_mt);
> > > > + struct vm_area_struct *prev = vmg->prev;
> > > > struct vm_area_struct *curr, *next, *res;
> > > > struct vm_area_struct *vma, *adjust, *remove, *remove2;
> > > > struct vm_area_struct *anon_dup = NULL;
> > > > @@ -953,16 +943,18 @@ static struct vm_area_struct
> > > > bool merge_prev = false;
> > > > bool merge_next = false;
> > > > bool vma_expanded = false;
> > > > + unsigned long addr = vmg->start;
> > > > + unsigned long end = vmg->end;
> > > > unsigned long vma_start = addr;
> > > > unsigned long vma_end = end;
> > > > - pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
> > > > + pgoff_t pglen = PHYS_PFN(end - addr);
> > > > long adj_start = 0;
> > > >
> > > > /*
> > > > * We later require that vma->vm_flags == vm_flags,
> > > > * so this tests vma->vm_flags & VM_SPECIAL, too.
> > > > */
> > > > - if (vm_flags & VM_SPECIAL)
> > > > + if (vmg->flags & VM_SPECIAL)
> > > > return NULL;
> > > >
> > > > /* Does the input range span an existing VMA? (cases 5 - 8) */
> > > > @@ -970,27 +962,26 @@ static struct vm_area_struct
> > > >
> > > > if (!curr || /* cases 1 - 4 */
> > > > end == curr->vm_end) /* cases 6 - 8, adjacent VMA */
> > > > - next = vma_lookup(mm, end);
> > > > + next = vmg->next = vma_lookup(mm, end);
> > > > else
> > > > - next = NULL; /* case 5 */
> > > > + next = vmg->next = NULL; /* case 5 */
> > >
> > > Again, is it worth keeping the "next" variable, or could we replace it
> > > with "vmg->next" everywhere?
> >
> > I already responded previously but equally, I'm explicitly using a local
> > variable to keep the code relatively simple and to not be constantly
> > ostensibly dereferencing vmg.
>
> Yeah, sure. OTOH whoever looks at the code may ask why there is both
> "vmg->next" and "next" and if they're really (supposed to be) the same
> thing or if there's a subtle difference.
Again, I ultimately delete this code, so this is not really worth spending
much time on.
>
> Petr T
Powered by blists - more mailing lists