[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240806155555.1125554c@mordecai.tesarici.cz>
Date: Tue, 6 Aug 2024 15:55:55 +0200
From: Petr Tesařík <petr@...arici.cz>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Cc: linux-mm@...ck.org, linux-kernel@...r.kernel.org, Andrew Morton
<akpm@...ux-foundation.org>, "Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>
Subject: Re: [PATCH 10/10] mm: rework vm_ops->close() handling on VMA merge
On Mon, 5 Aug 2024 13:13:57 +0100
Lorenzo Stoakes <lorenzo.stoakes@...cle.com> wrote:
> In commit 714965ca8252 ("mm/mmap: start distinguishing if vma can be
> removed in mergeability test") we relaxed the VMA merge rules for VMAs
> possessing a vm_ops->close() hook, permitting this operation in instances
> where we wouldn't delete the VMA as part of the merge operation.
>
> This was later corrected in commit fc0c8f9089c2 ("mm, mmap: fix vma_merge()
> case 7 with vma_ops->close") to account for a subtle case that the previous
> commit had not taken into account.
>
> In both instances, we first rely on is_mergeable_vma() to determine whether
> we might be dealing with a VMA that might be removed, taking advantage of
> the fact that a 'previous' VMA will never be deleted, only VMAs that follow
> it.
>
> The second patch corrects the instance where a merge of the previous VMA
> into a subsequent one did not correctly check whether the subsequent VMA
> had a vm_ops->close() handler.
>
> Both changes prevent merge cases that are actually permissible (for
> instance a merge of a VMA into a following VMA with a vm_ops->close(), but
> with no previous VMA, which would result in the next VMA being extended,
> not deleted).
>
> In addition, both changes fail to consider the case where a VMA that would
> otherwise be merged with the previous and next VMA might have
> vm_ops->close(), on the assumption that for this to be the case, all three
> would have to have the same vma->vm_file to be mergeable and thus the same
> vm_ops.
>
> And in addition both changes operate at 50,000 feet, trying to guess
> whether a VMA will be deleted.
>
> As we have majorly refactored the VMA merge operation and de-duplicated
> code to the point where we know precisely where deletions will occur, this
> patch removes the aforementioned checks altogether and instead explicitly
> checks whether a VMA will be deleted.
>
> In cases where a reduced merge is still possible (where we merge both
> previous and next VMA but the next VMA has a vm_ops->close hook, meaning we
> could just merge the previous and current VMA), we do so, otherwise the
> merge is not permitted.
>
> We take advantage of our userland testing to assert that this functions
> correctly - replacing the previous limited vm_ops->close() tests with tests
> for every single case where we delete a VMA.
>
> We also update all testing for both new and modified VMAs to set
> vma->vm_ops->close() in every single instance where this would not prevent
> the merge, to assert that we never do so.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> mm/vma.c | 69 ++++++++-----
> tools/testing/vma/vma.c | 213 ++++++++++++++++++++++++----------------
> 2 files changed, 173 insertions(+), 109 deletions(-)
>
> diff --git a/mm/vma.c b/mm/vma.c
> index c55ae035f5d6..9c779fc65ba8 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -10,14 +10,6 @@
> static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_next)
> {
> struct vm_area_struct *vma = merge_next ? vmg->next : vmg->prev;
> - /*
> - * If the vma has a ->close operation then the driver probably needs to
> - * release per-vma resources, so we don't attempt to merge those if the
> - * caller indicates the current vma may be removed as part of the merge,
> - * which is the case if we are attempting to merge the next VMA into
> - * this one.
> - */
> - bool may_remove_vma = merge_next;
See my comment on PATCH 02/10. You're removing the local variable here,
so maybe it need not be introduced in the first place?
> if (!mpol_equal(vmg->policy, vma_policy(vma)))
> return false;
> @@ -33,8 +25,6 @@ static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_nex
> return false;
> if (vma->vm_file != vmg->file)
> return false;
> - if (may_remove_vma && vma->vm_ops && vma->vm_ops->close)
> - return false;
> if (!is_mergeable_vm_userfaultfd_ctx(vma, vmg->uffd_ctx))
> return false;
> if (!anon_vma_name_eq(anon_vma_name(vma), vmg->anon_name))
> @@ -606,6 +596,12 @@ static int commit_merge(struct vma_merge_struct *vmg,
> return 0;
> }
>
> +/* We can only remove VMAs when merging if they do not have a close hook. */
> +static bool can_merge_remove_vma(struct vm_area_struct *vma)
> +{
> + return !vma->vm_ops || !vma->vm_ops->close;
> +}
> +
> /*
> * vma_merge_modified - Attempt to merge VMAs based on a VMA having its
> * attributes modified.
> @@ -710,9 +706,30 @@ static struct vm_area_struct *vma_merge_modified(struct vma_merge_struct *vmg)
>
> /* If we span the entire VMA, a merge implies it will be deleted. */
> merge_will_delete_vma = left_side && right_side;
> - /* If we merge both VMAs, then next is also deleted. */
> +
> + /*
> + * If we need to remove vma in its entirety but are unable to do so,
> + * we have no sensible recourse but to abort the merge.
> + */
> + if (merge_will_delete_vma && !can_merge_remove_vma(vma))
> + return NULL;
> +
> + /*
> + * If we merge both VMAs, then next is also deleted. This implies
> + * merge_will_delete_vma also.
> + */
> merge_will_delete_next = merge_both;
>
> + /*
> + * If we cannot delete next, then we can reduce the operation to merging
> + * prev and vma (thereby deleting vma).
> + */
> + if (merge_will_delete_next && !can_merge_remove_vma(next)) {
> + merge_will_delete_next = false;
> + merge_right = false;
> + merge_both = false;
> + }
> +
> /* No matter what happens, we will be adjusting vma. */
> vma_start_write(vma);
>
> @@ -756,21 +773,12 @@ static struct vm_area_struct *vma_merge_modified(struct vma_merge_struct *vmg)
> vmg->start = prev->vm_start;
> vmg->pgoff = prev->vm_pgoff;
>
> - if (merge_will_delete_vma) {
> - /*
> - * can_vma_merge_after() assumed we would not be
> - * removing vma, so it skipped the check for
> - * vm_ops->close, but we are removing vma.
> - */
> - if (vma->vm_ops && vma->vm_ops->close)
> - err = -EINVAL;
> - } else {
> + if (!merge_will_delete_vma) {
> adjust = vma;
> adj_start = end - vma->vm_start;
> }
>
> - if (!err)
> - err = dup_anon_vma(prev, vma, &anon_dup);
> + err = dup_anon_vma(prev, vma, &anon_dup);
> } else { /* merge_right */
> /*
> * |<----->| OR
> @@ -886,6 +894,8 @@ struct vm_area_struct *vma_merge_new_vma(struct vma_merge_struct *vmg)
> unsigned long end = vmg->end;
> pgoff_t pgoff = vmg->pgoff;
> pgoff_t pglen = PHYS_PFN(end - start);
> + bool merge_next = false;
> + struct anon_vma *anon_vma = vmg->anon_vma;
Calling this "anon_vma" feels a bit too generic. IIUC you want to save
the original vmg->anon_vma in case the VMA turns out to be ummergeable
with the next VMA after vmg->anon_vma has already been modified.
What about calling it "orig_anon_vma"?
Petr T
>
> VM_WARN_ON(vmg->vma);
>
> @@ -916,8 +926,9 @@ struct vm_area_struct *vma_merge_new_vma(struct vma_merge_struct *vmg)
> vmg->end = next->vm_end;
> vmg->vma = next;
> vmg->pgoff = next->vm_pgoff - pglen;
> -
> vmg->anon_vma = next->anon_vma;
> +
> + merge_next = true;
> }
>
> /* If we can merge with the previous VMA, adjust vmg accordingly. */
> @@ -925,6 +936,16 @@ struct vm_area_struct *vma_merge_new_vma(struct vma_merge_struct *vmg)
> vmg->start = prev->vm_start;
> vmg->vma = prev;
> vmg->pgoff = prev->vm_pgoff;
> +
> + /*
> + * If this merge would result in removal of the next VMA but we
> + * are not permitted to do so, reduce the operation to merging
> + * prev and vma.
> + */
> + if (merge_next && !can_merge_remove_vma(next)) {
> + vmg->end = end;
> + vmg->anon_vma = anon_vma;
> + }
> } else if (prev) {
> vma_iter_next_range(vmg->vmi);
> }
> @@ -978,6 +999,8 @@ int vma_expand(struct vma_merge_struct *vmg)
> int ret;
>
> remove_next = true;
> + /* This should already have been checked by this point. */
> + VM_WARN_ON(!can_merge_remove_vma(next));
> vma_start_write(next);
> ret = dup_anon_vma(vma, next, &anon_dup);
> if (ret)
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index e465dc22e2d0..0c0a6ffcfc98 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -327,6 +327,9 @@ static bool test_vma_merge_new_vma(void)
> struct anon_vma_chain dummy_anon_vma_chain_d = {
> .anon_vma = &dummy_anon_vma,
> };
> + const struct vm_operations_struct vm_ops = {
> + .close = dummy_close,
> + };
> int count;
> struct vm_area_struct *vma, *vma_a, *vma_b, *vma_c, *vma_d;
> bool merged;
> @@ -370,6 +373,7 @@ static bool test_vma_merge_new_vma(void)
> * 0123456789abc
> * AA*B DD CC
> */
> + vma_a->vm_ops = &vm_ops; /* This should have no impact. */
> vma_b->anon_vma = &dummy_anon_vma;
> vma = try_merge_new_vma(&mm, &vmg, 0x2000, 0x3000, 2, flags, &merged);
> ASSERT_EQ(vma, vma_a);
> @@ -406,6 +410,7 @@ static bool test_vma_merge_new_vma(void)
> * AAAAA *DD CC
> */
> vma_d->anon_vma = &dummy_anon_vma;
> + vma_d->vm_ops = &vm_ops; /* This should have no impact. */
> vma = try_merge_new_vma(&mm, &vmg, 0x6000, 0x7000, 6, flags, &merged);
> ASSERT_EQ(vma, vma_d);
> /* Prepend. */
> @@ -423,6 +428,7 @@ static bool test_vma_merge_new_vma(void)
> * 0123456789abc
> * AAAAA*DDD CC
> */
> + vma_d->vm_ops = NULL; /* This would otherwise degrade the merge. */
> vma = try_merge_new_vma(&mm, &vmg, 0x5000, 0x6000, 5, flags, &merged);
> ASSERT_EQ(vma, vma_a);
> /* Merge with A, delete D. */
> @@ -573,120 +579,145 @@ static bool test_vma_merge_with_close(void)
> struct vma_merge_struct vmg = {
> .vmi = &vmi,
> };
> - struct vm_operations_struct vm_ops = {};
> - struct vm_area_struct *vma_next =
> - alloc_and_link_vma(&mm, 0x2000, 0x3000, 2, flags);
> - struct vm_area_struct *vma;
> + const struct vm_operations_struct vm_ops = {
> + .close = dummy_close,
> + };
> + struct vm_area_struct *vma_prev, *vma_next, *vma;
>
> /*
> - * When we merge VMAs we sometimes have to delete others as part of the
> - * operation.
> - *
> - * Considering the two possible adjacent VMAs to which a VMA can be
> - * merged:
> - *
> - * [ prev ][ vma ][ next ]
> - *
> - * In no case will we need to delete prev. If the operation is
> - * mergeable, then prev will be extended with one or both of vma and
> - * next deleted.
> - *
> - * As a result, during initial mergeability checks, only
> - * can_vma_merge_before() (which implies the VMA being merged with is
> - * 'next' as shown above) bothers to check to see whether the next VMA
> - * has a vm_ops->close() callback that will need to be called when
> - * removed.
> - *
> - * If it does, then we cannot merge as the resources that the close()
> - * operation potentially clears down are tied only to the existing VMA
> - * range and we have no way of extending those to the nearly merged one.
> - *
> - * We must consider two scenarios:
> - *
> - * A.
> + * When merging VMAs we are not permitted to remove any VMA that has a
> + * vm_ops->close() hook.
> *
> - * vm_ops->close: - - !NULL
> - * [ prev ][ vma ][ next ]
> - *
> - * Where prev may or may not be present/mergeable.
> - *
> - * This is picked up by a specific check in can_vma_merge_before().
> - *
> - * B.
> - *
> - * vm_ops->close: - !NULL
> - * [ prev ][ vma ]
> - *
> - * Where prev and vma are present and mergeable.
> - *
> - * This is picked up by a specific check in vma_merge_modified().
> - *
> - * IMPORTANT NOTE: We make the assumption that the following case:
> + * This is because executing this hook may clear state that is pertinent
> + * to the VMA range as a whole.
> + */
> +
> + /*
> + * The only case of a new VMA merge that results in a VMA being deleted
> + * is one where both the previous and next VMAs are merged - in this
> + * instance the next VMA is deleted, and the previous VMA is extended.
> *
> - * - !NULL NULL
> - * [ prev ][ vma ][ next ]
> + * If we are unable to do so, we reduce the operation to simply
> + * extending the prev VMA and not merging next.
> *
> - * Cannot occur, because vma->vm_ops being the same implies the same
> - * vma->vm_file, and therefore this would mean that next->vm_ops->close
> - * would be set too, and thus scenario A would pick this up.
> + * 0123456789
> + * PPP**NNNN
> + * ->
> + * 0123456789
> + * PPPPPPNNN
> */
>
> - ASSERT_NE(vma_next, NULL);
> + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
> + vma_next->vm_ops = &vm_ops;
> +
> + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
> + ASSERT_EQ(vma_merge_new_vma(&vmg), vma_prev);
> + ASSERT_EQ(vma_prev->vm_start, 0);
> + ASSERT_EQ(vma_prev->vm_end, 0x5000);
> + ASSERT_EQ(vma_prev->vm_pgoff, 0);
> +
> + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
>
> /*
> - * SCENARIO A
> + * When modifying an existing VMA there are further cases where we
> + * delete VMAs.
> + *
> + * <>
> + * 0123456789
> + * PPPVV
> *
> - * 0123
> - * *N
> + * In this instance, if vma has a close hook, the merge simply cannot
> + * proceed.
> */
>
> - /* Make the next VMA have a close() callback. */
> - vm_ops.close = dummy_close;
> - vma_next->vm_ops = (const struct vm_operations_struct *)&vm_ops;
> + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
> + vma->vm_ops = &vm_ops;
>
> - /* Our proposed VMA has characteristics that would otherwise be merged. */
> - vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags);
> + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
> + vmg.prev = vma_prev;
> + vmg.vma = vma;
>
> - /* The next VMA having a close() operator should cause the merge to fail.*/
> - ASSERT_EQ(vma_merge_new_vma(&vmg), NULL);
> + ASSERT_EQ(vma_merge_modified(&vmg), NULL);
>
> - /* Now create the VMA so we can merge via modified flags */
> - vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags);
> - vma = alloc_and_link_vma(&mm, 0x1000, 0x2000, 1, flags);
> - vmg.vma = vma;
> + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
>
> /*
> - * The VMA being modified in a way that would otherwise merge should
> - * also fail.
> + * This case is mirrored if merging with next.
> + *
> + * <>
> + * 0123456789
> + * VVNNNN
> + *
> + * In this instance, if vma has a close hook, the merge simply cannot
> + * proceed.
> */
> +
> + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
> + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
> + vma->vm_ops = &vm_ops;
> +
> + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
> + vmg.vma = vma;
> +
> ASSERT_EQ(vma_merge_modified(&vmg), NULL);
>
> - /* SCENARIO B
> + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
> +
> + /*
> + * Finally, we consider two variants of the case where we modify a VMA
> + * to merge with both the previous and next VMAs.
> *
> - * 0123
> - * P*
> + * The first variant is where vma has a close hook. In this instance, no
> + * merge can proceed.
> *
> - * In order for this scenario to trigger, the VMA currently being
> - * modified must also have a .close().
> + * <>
> + * 0123456789
> + * PPPVVNNNN
> */
>
> - /* Reset VMG state. */
> - vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags);
> + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
> + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
> + vma->vm_ops = &vm_ops;
> +
> + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
> + vmg.prev = vma_prev;
> + vmg.vma = vma;
> +
> + ASSERT_EQ(vma_merge_modified(&vmg), NULL);
> +
> + ASSERT_EQ(cleanup_mm(&mm, &vmi), 3);
> +
> /*
> - * Make next unmergeable, and don't let the scenario A check pick this
> - * up, we want to reproduce scenario B only.
> + * The second variant is where next has a close hook. In this instance,
> + * we reduce the operation to a merge between prev and vma.
> + *
> + * <>
> + * 0123456789
> + * PPPVVNNNN
> + * ->
> + * 0123456789
> + * PPPPPNNNN
> */
> - vma_next->vm_ops = NULL;
> - vma_next->__vm_flags &= ~VM_MAYWRITE;
> - /* Allocate prev. */
> - vmg.prev = alloc_and_link_vma(&mm, 0, 0x1000, 0, flags);
> - /* Assign a vm_ops->close() function to VMA explicitly. */
> - vma->vm_ops = (const struct vm_operations_struct *)&vm_ops;
> +
> + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
> + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
> + vma_next->vm_ops = &vm_ops;
> +
> + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
> + vmg.prev = vma_prev;
> vmg.vma = vma;
> - /* Make sure merge does not occur. */
> - ASSERT_EQ(vma_merge_modified(&vmg), NULL);
>
> - cleanup_mm(&mm, &vmi);
> + ASSERT_EQ(vma_merge_modified(&vmg), vma_prev);
> + ASSERT_EQ(vma_prev->vm_start, 0);
> + ASSERT_EQ(vma_prev->vm_end, 0x5000);
> + ASSERT_EQ(vma_prev->vm_pgoff, 0);
> +
> + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
> +
> return true;
> }
>
> @@ -699,6 +730,9 @@ static bool test_vma_merge_modified(void)
> struct vma_merge_struct vmg = {
> .vmi = &vmi,
> };
> + const struct vm_operations_struct vm_ops = {
> + .close = dummy_close,
> + };
>
> /*
> * Merge right case - partial span.
> @@ -711,7 +745,9 @@ static bool test_vma_merge_modified(void)
> * VNNNNNN
> */
> vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
> + vma->vm_ops = &vm_ops; /* This should have no impact. */
> vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
> + vma_next->vm_ops = &vm_ops; /* This should have no impact. */
> vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
> vmg.vma = vma;
> vmg.prev = vma;
> @@ -743,6 +779,7 @@ static bool test_vma_merge_modified(void)
> */
> vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
> vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
> + vma_next->vm_ops = &vm_ops; /* This should have no impact. */
> vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags);
> vmg.vma = vma;
> vma->anon_vma = &dummy_anon_vma;
> @@ -768,7 +805,9 @@ static bool test_vma_merge_modified(void)
> * PPPPPPV
> */
> vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
> vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
> + vma->vm_ops = &vm_ops; /* This should have no impact. */
> vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
> vmg.prev = vma_prev;
> vmg.vma = vma;
> @@ -800,6 +839,7 @@ static bool test_vma_merge_modified(void)
> * PPPPPPP
> */
> vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
> vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
> vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
> vmg.prev = vma_prev;
> @@ -827,6 +867,7 @@ static bool test_vma_merge_modified(void)
> * PPPPPPPPPP
> */
> vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
> + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
> vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
> vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
> vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
> --
> 2.45.2
>
Powered by blists - more mailing lists