[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <23b5b549b0eaefb2922625626e58c2a352f3e93c.1763460113.git.lorenzo.stoakes@oracle.com>
Date: Tue, 18 Nov 2025 10:17:45 +0000
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: Jonathan Corbet <corbet@....net>, David Hildenbrand <david@...hat.com>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>, Mike Rapoport <rppt@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Jann Horn <jannh@...gle.com>, Pedro Falcato <pfalcato@...e.de>,
Zi Yan <ziy@...dia.com>, Baolin Wang <baolin.wang@...ux.alibaba.com>,
Nico Pache <npache@...hat.com>, Ryan Roberts <ryan.roberts@....com>,
Dev Jain <dev.jain@....com>, Barry Song <baohua@...nel.org>,
Lance Yang <lance.yang@...ux.dev>, linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org, linux-doc@...r.kernel.org,
linux-mm@...ck.org, linux-trace-kernel@...r.kernel.org,
linux-kselftest@...r.kernel.org, Andrei Vagin <avagin@...il.com>
Subject: [PATCH v4 3/9] mm: update vma_modify_flags() to handle residual flags, document
The vma_modify_*() family of functions each either perform splits, a merge
or no changes at all in preparation for the requested modification to
occur.
When doing so for a VMA flags change, we currently don't account for any
flags which may remain (for instance, VM_SOFTDIRTY) despite the requested
change in the case that a merge succeeded.
This is made more important by subsequent patches which will introduce the
concept of sticky VMA flags which rely on this behaviour.
This patch fixes this by passing the VMA flags parameter as a pointer and
updating it accordingly on merge and updating callers to accommodate for
this.
Additionally, while we are here, we add kdocs for each of the
vma_modify_*() functions, as the fact that the requested modification is
not performed is confusing so it is useful to make this abundantly
clear.
We also update the VMA userland tests to account for this change.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
mm/madvise.c | 2 +-
mm/mlock.c | 2 +-
mm/mprotect.c | 2 +-
mm/mseal.c | 9 +--
mm/vma.c | 56 ++++++++--------
mm/vma.h | 140 +++++++++++++++++++++++++++++-----------
tools/testing/vma/vma.c | 3 +-
7 files changed, 144 insertions(+), 70 deletions(-)
diff --git a/mm/madvise.c b/mm/madvise.c
index fb1c86e630b6..0b3280752bfb 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -167,7 +167,7 @@ static int madvise_update_vma(vm_flags_t new_flags,
range->start, range->end, anon_name);
else
vma = vma_modify_flags(&vmi, madv_behavior->prev, vma,
- range->start, range->end, new_flags);
+ range->start, range->end, &new_flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
diff --git a/mm/mlock.c b/mm/mlock.c
index bb0776f5ef7c..2f699c3497a5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -478,7 +478,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
goto out;
- vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags);
+ vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ab4e06cd9a69..db93d3bb1a5e 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -756,7 +756,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
newflags &= ~VM_ACCOUNT;
}
- vma = vma_modify_flags(vmi, *pprev, vma, start, end, newflags);
+ vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags);
if (IS_ERR(vma)) {
error = PTR_ERR(vma);
goto fail;
diff --git a/mm/mseal.c b/mm/mseal.c
index e5b205562d2e..ae442683c5c0 100644
--- a/mm/mseal.c
+++ b/mm/mseal.c
@@ -66,12 +66,13 @@ static int mseal_apply(struct mm_struct *mm,
prev = vma;
for_each_vma_range(vmi, vma, end) {
- unsigned long curr_end = MIN(vma->vm_end, end);
+ const unsigned long curr_end = MIN(vma->vm_end, end);
if (!(vma->vm_flags & VM_SEALED)) {
- vma = vma_modify_flags(&vmi, prev, vma,
- curr_start, curr_end,
- vma->vm_flags | VM_SEALED);
+ vm_flags_t vm_flags = vma->vm_flags | VM_SEALED;
+
+ vma = vma_modify_flags(&vmi, prev, vma, curr_start,
+ curr_end, &vm_flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
vm_flags_set(vma, VM_SEALED);
diff --git a/mm/vma.c b/mm/vma.c
index 0c5e391fe2e2..47469c036a72 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1638,25 +1638,35 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg)
return vma;
}
-struct vm_area_struct *vma_modify_flags(
- struct vma_iterator *vmi, struct vm_area_struct *prev,
- struct vm_area_struct *vma, unsigned long start, unsigned long end,
- vm_flags_t vm_flags)
+struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ vm_flags_t *vm_flags_ptr)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
+ const vm_flags_t vm_flags = *vm_flags_ptr;
+ struct vm_area_struct *ret;
vmg.vm_flags = vm_flags;
- return vma_modify(&vmg);
+ ret = vma_modify(&vmg);
+ if (IS_ERR(ret))
+ return ret;
+
+ /*
+ * For a merge to succeed, the flags must match those requested. For
+ * flags which do not obey typical merge rules (i.e. do not need to
+ * match), we must let the caller know about them.
+ */
+ if (vmg.state == VMA_MERGE_SUCCESS)
+ *vm_flags_ptr = ret->vm_flags;
+ return ret;
}
-struct vm_area_struct
-*vma_modify_name(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- struct anon_vma_name *new_name)
+struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct anon_vma_name *new_name)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
@@ -1665,12 +1675,10 @@ struct vm_area_struct
return vma_modify(&vmg);
}
-struct vm_area_struct
-*vma_modify_policy(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- struct mempolicy *new_pol)
+struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct mempolicy *new_pol)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
@@ -1679,14 +1687,10 @@ struct vm_area_struct
return vma_modify(&vmg);
}
-struct vm_area_struct
-*vma_modify_flags_uffd(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- vm_flags_t vm_flags,
- struct vm_userfaultfd_ctx new_ctx,
- bool give_up_on_oom)
+struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, vm_flags_t vm_flags,
+ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
diff --git a/mm/vma.h b/mm/vma.h
index e912d42c428a..75f1d9c7204b 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -263,47 +263,115 @@ void remove_vma(struct vm_area_struct *vma);
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct vm_area_struct *next);
-/* We are about to modify the VMA's flags. */
-__must_check struct vm_area_struct
-*vma_modify_flags(struct vma_iterator *vmi,
+/**
+ * vma_modify_flags() - Peform any necessary split/merge in preparation for
+ * setting VMA flags to *@...flags in the range @start to @end contained within
+ * @vma.
+ * @vmi: Valid VMA iterator positioned at @vma.
+ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
+ * @vma: The VMA containing the range @start to @end to be updated.
+ * @start: The start of the range to update. May be offset within @vma.
+ * @end: The exclusive end of the range to update, may be offset within @vma.
+ * @vm_flags_ptr: A pointer to the VMA flags that the @start to @end range is
+ * about to be set to. On merge, this will be updated to include any additional
+ * flags which remain in place.
+ *
+ * IMPORTANT: The actual modification being requested here is NOT applied,
+ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
+ * and the caller is expected to perform the actual modification.
+ *
+ * In order to account for VMA flags which may persist (e.g. soft-dirty), the
+ * @vm_flags_ptr parameter points to the requested flags which are then updated
+ * so the caller, should they overwrite any existing flags, correctly retains
+ * these.
+ *
+ * Returns: A VMA which contains the range @start to @end ready to have its
+ * flags altered to *@...flags.
+ */
+__must_check struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ vm_flags_t *vm_flags_ptr);
+
+/**
+ * vma_modify_name() - Peform any necessary split/merge in preparation for
+ * setting anonymous VMA name to @new_name in the range @start to @end contained
+ * within @vma.
+ * @vmi: Valid VMA iterator positioned at @vma.
+ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
+ * @vma: The VMA containing the range @start to @end to be updated.
+ * @start: The start of the range to update. May be offset within @vma.
+ * @end: The exclusive end of the range to update, may be offset within @vma.
+ * @new_name: The anonymous VMA name that the @start to @end range is about to
+ * be set to.
+ *
+ * IMPORTANT: The actual modification being requested here is NOT applied,
+ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
+ * and the caller is expected to perform the actual modification.
+ *
+ * Returns: A VMA which contains the range @start to @end ready to have its
+ * anonymous VMA name changed to @new_name.
+ */
+__must_check struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi,
struct vm_area_struct *prev, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- vm_flags_t vm_flags);
-
-/* We are about to modify the VMA's anon_name. */
-__must_check struct vm_area_struct
-*vma_modify_name(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- struct anon_vma_name *new_name);
-
-/* We are about to modify the VMA's memory policy. */
-__must_check struct vm_area_struct
-*vma_modify_policy(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
+ struct anon_vma_name *new_name);
+
+/**
+ * vma_modify_policy() - Peform any necessary split/merge in preparation for
+ * setting NUMA policy to @new_pol in the range @start to @end contained
+ * within @vma.
+ * @vmi: Valid VMA iterator positioned at @vma.
+ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
+ * @vma: The VMA containing the range @start to @end to be updated.
+ * @start: The start of the range to update. May be offset within @vma.
+ * @end: The exclusive end of the range to update, may be offset within @vma.
+ * @new_pol: The NUMA policy that the @start to @end range is about to be set
+ * to.
+ *
+ * IMPORTANT: The actual modification being requested here is NOT applied,
+ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
+ * and the caller is expected to perform the actual modification.
+ *
+ * Returns: A VMA which contains the range @start to @end ready to have its
+ * NUMA policy changed to @new_pol.
+ */
+__must_check struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct mempolicy *new_pol);
-/* We are about to modify the VMA's flags and/or uffd context. */
-__must_check struct vm_area_struct
-*vma_modify_flags_uffd(struct vma_iterator *vmi,
- struct vm_area_struct *prev,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- vm_flags_t vm_flags,
- struct vm_userfaultfd_ctx new_ctx,
- bool give_up_on_oom);
-
-__must_check struct vm_area_struct
-*vma_merge_new_range(struct vma_merge_struct *vmg);
-
-__must_check struct vm_area_struct
-*vma_merge_extend(struct vma_iterator *vmi,
- struct vm_area_struct *vma,
- unsigned long delta);
+/**
+ * vma_modify_flags_uffd() - Peform any necessary split/merge in preparation for
+ * setting VMA flags to @vm_flags and UFFD context to @new_ctx in the range
+ * @start to @end contained within @vma.
+ * @vmi: Valid VMA iterator positioned at @vma.
+ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
+ * @vma: The VMA containing the range @start to @end to be updated.
+ * @start: The start of the range to update. May be offset within @vma.
+ * @end: The exclusive end of the range to update, may be offset within @vma.
+ * @vm_flags: The VMA flags that the @start to @end range is about to be set to.
+ * @new_ctx: The userfaultfd context that the @start to @end range is about to
+ * be set to.
+ * @give_up_on_oom: If an out of memory condition occurs on merge, simply give
+ * up on it and treat the merge as best-effort.
+ *
+ * IMPORTANT: The actual modification being requested here is NOT applied,
+ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
+ * and the caller is expected to perform the actual modification.
+ *
+ * Returns: A VMA which contains the range @start to @end ready to have its VMA
+ * flags changed to @vm_flags and its userfaultfd context changed to @new_ctx.
+ */
+__must_check struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, vm_flags_t vm_flags,
+ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom);
+
+__must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
+
+__must_check struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
+ struct vm_area_struct *vma, unsigned long delta);
void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb);
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 656e1c75b711..fd37ce3b2628 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -339,6 +339,7 @@ static bool test_simple_modify(void)
struct mm_struct mm = {};
struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags);
VMA_ITERATOR(vmi, &mm, 0x1000);
+ vm_flags_t flags = VM_READ | VM_MAYREAD;
ASSERT_FALSE(attach_vma(&mm, init_vma));
@@ -347,7 +348,7 @@ static bool test_simple_modify(void)
* performs the merge/split only.
*/
vma = vma_modify_flags(&vmi, init_vma, init_vma,
- 0x1000, 0x2000, VM_READ | VM_MAYREAD);
+ 0x1000, 0x2000, &flags);
ASSERT_NE(vma, NULL);
/* We modify the provided VMA, and on split allocate new VMAs. */
ASSERT_EQ(vma, init_vma);
--
2.51.2
Powered by blists - more mailing lists