[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a520417243ce7f8122de05a0420f964a922e392a.1750363557.git.lorenzo.stoakes@oracle.com>
Date: Thu, 19 Jun 2025 21:26:28 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: David Hildenbrand <david@...hat.com>, Zi Yan <ziy@...dia.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Nico Pache <npache@...hat.com>, Ryan Roberts <ryan.roberts@....com>,
Dev Jain <dev.jain@....com>, Barry Song <baohua@...nel.org>,
Vlastimil Babka <vbabka@...e.cz>, Jann Horn <jannh@...gle.com>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
Lance Yang <ioworker0@...il.com>, SeongJae Park <sj@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>
Subject: [PATCH 3/5] mm/madvise: thread VMA range state through madvise_behavior
Rather than updating start and a confusing local parameter 'tmp' in
madvise_walk_vmas(), instead store the current range being operated upon in
the struct madvise_behavior helper object in a range pair and use this
consistently in all operations.
This makes it clearer what is going on and opens the door to further
cleanup now we store state regarding what is currently being operated upon
here.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
mm/madvise.c | 101 ++++++++++++++++++++++++++++-----------------------
1 file changed, 55 insertions(+), 46 deletions(-)
diff --git a/mm/madvise.c b/mm/madvise.c
index 47485653c2a1..6faa38b92111 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -58,17 +58,26 @@ enum madvise_lock_mode {
MADVISE_VMA_READ_LOCK,
};
+struct madvise_behavior_range {
+ unsigned long start, end;
+};
+
struct madvise_behavior {
struct mm_struct *mm;
int behavior;
struct mmu_gather *tlb;
enum madvise_lock_mode lock_mode;
struct anon_vma_name *anon_name;
+
+ /*
+ * The range over which the behaviour is currently being applied. If
+ * traversing multiple VMAs, this is updated for each.
+ */
+ struct madvise_behavior_range range;
};
#ifdef CONFIG_ANON_VMA_NAME
-static int madvise_walk_vmas(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior);
+static int madvise_walk_vmas(struct madvise_behavior *madv_behavior);
struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
@@ -149,8 +158,9 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
madv_behavior.behavior =
anon_name ? __MADV_SET_ANON_VMA_NAME : __MADV_CLEAR_ANON_VMA_NAME;
-
- return madvise_walk_vmas(start, end, &madv_behavior);
+ madv_behavior.range.start = start;
+ madv_behavior.range.end = end;
+ return madvise_walk_vmas(&madv_behavior);
}
static bool is_anon_vma_name(int behavior)
@@ -1012,12 +1022,13 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
return -EINVAL;
}
-static long madvise_populate(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+static long madvise_populate(struct madvise_behavior *madv_behavior)
{
struct mm_struct *mm = madv_behavior->mm;
const bool write = madv_behavior->behavior == MADV_POPULATE_WRITE;
int locked = 1;
+ unsigned long start = madv_behavior->range.start;
+ unsigned long end = madv_behavior->range.end;
long pages;
while (start < end) {
@@ -1308,12 +1319,13 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
*/
static int madvise_vma_behavior(struct vm_area_struct *vma,
struct vm_area_struct **prev,
- unsigned long start, unsigned long end,
struct madvise_behavior *madv_behavior)
{
int behavior = madv_behavior->behavior;
struct anon_vma_name *anon_name = madv_behavior->anon_name;
vm_flags_t new_flags = vma->vm_flags;
+ unsigned long start = madv_behavior->range.start;
+ unsigned long end = madv_behavior->range.end;
int error;
if (unlikely(!can_modify_vma_madv(vma, behavior)))
@@ -1425,10 +1437,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
/*
* Error injection support for memory error handling.
*/
-static int madvise_inject_error(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+static int madvise_inject_error(struct madvise_behavior *madv_behavior)
{
unsigned long size;
+ unsigned long start = madv_behavior->range.start;
+ unsigned long end = madv_behavior->range.end;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1482,8 +1495,7 @@ static bool is_memory_failure(struct madvise_behavior *madv_behavior)
#else
-static int madvise_inject_error(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+static int madvise_inject_error(struct madvise_behavior *madv_behavior)
{
return 0;
}
@@ -1565,20 +1577,20 @@ static bool process_madvise_remote_valid(int behavior)
* If a VMA read lock could not be acquired, we return NULL and expect caller to
* fallback to mmap lock behaviour.
*/
-static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm,
- struct madvise_behavior *madv_behavior,
- unsigned long start, unsigned long end)
+static
+struct vm_area_struct *try_vma_read_lock(struct madvise_behavior *madv_behavior)
{
+ struct mm_struct *mm = madv_behavior->mm;
struct vm_area_struct *vma;
- vma = lock_vma_under_rcu(mm, start);
+ vma = lock_vma_under_rcu(mm, madv_behavior->range.start);
if (!vma)
goto take_mmap_read_lock;
/*
* Must span only a single VMA; uffd and remote processes are
* unsupported.
*/
- if (end > vma->vm_end || current->mm != mm ||
+ if (madv_behavior->range.end > vma->vm_end || current->mm != mm ||
userfaultfd_armed(vma)) {
vma_end_read(vma);
goto take_mmap_read_lock;
@@ -1600,13 +1612,13 @@ static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm,
* Must be called with the mmap_lock held for reading or writing.
*/
static
-int madvise_walk_vmas(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+int madvise_walk_vmas(struct madvise_behavior *madv_behavior)
{
struct mm_struct *mm = madv_behavior->mm;
+ struct madvise_behavior_range *range = &madv_behavior->range;
+ unsigned long end = range->end;
struct vm_area_struct *vma;
struct vm_area_struct *prev;
- unsigned long tmp;
int unmapped_error = 0;
int error;
@@ -1615,11 +1627,10 @@ int madvise_walk_vmas(unsigned long start, unsigned long end,
* tentatively, avoiding walking VMAs.
*/
if (madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK) {
- vma = try_vma_read_lock(mm, madv_behavior, start, end);
+ vma = try_vma_read_lock(madv_behavior);
if (vma) {
prev = vma;
- error = madvise_vma_behavior(vma, &prev, start, end,
- madv_behavior);
+ error = madvise_vma_behavior(vma, &prev, madv_behavior);
vma_end_read(vma);
return error;
}
@@ -1630,8 +1641,8 @@ int madvise_walk_vmas(unsigned long start, unsigned long end,
* ranges, just ignore them, but return -ENOMEM at the end.
* - different from the way of handling in mlock etc.
*/
- vma = find_vma_prev(mm, start, &prev);
- if (vma && start > vma->vm_start)
+ vma = find_vma_prev(mm, range->start, &prev);
+ if (vma && range->start > vma->vm_start)
prev = vma;
for (;;) {
@@ -1640,32 +1651,29 @@ int madvise_walk_vmas(unsigned long start, unsigned long end,
return -ENOMEM;
/* Here start < (end|vma->vm_end). */
- if (start < vma->vm_start) {
+ if (range->start < vma->vm_start) {
unmapped_error = -ENOMEM;
- start = vma->vm_start;
- if (start >= end)
+ range->start = vma->vm_start;
+ if (range->start >= end)
break;
}
- /* Here vma->vm_start <= start < (end|vma->vm_end) */
- tmp = vma->vm_end;
- if (end < tmp)
- tmp = end;
+ /* Here vma->vm_start <= range->start < (end|vma->vm_end) */
+ range->end = min(vma->vm_end, end);
- /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
- error = madvise_vma_behavior(vma, &prev, start, tmp,
- madv_behavior);
+ /* Here vma->vm_start <= range->start < range->end <= (end|vma->vm_end). */
+ error = madvise_vma_behavior(vma, &prev, madv_behavior);
if (error)
return error;
- start = tmp;
- if (prev && start < prev->vm_end)
- start = prev->vm_end;
- if (start >= end)
+ range->start = range->end;
+ if (prev && range->start < prev->vm_end)
+ range->start = prev->vm_end;
+ if (range->start >= range->end)
break;
if (prev)
vma = find_vma(mm, prev->vm_end);
else /* madvise_remove dropped mmap_lock */
- vma = find_vma(mm, start);
+ vma = find_vma(mm, range->start);
}
return unmapped_error;
@@ -1846,22 +1854,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in,
struct madvise_behavior *madv_behavior)
{
struct blk_plug plug;
- unsigned long end;
int error;
+ struct madvise_behavior_range *range = &madv_behavior->range;
if (is_memory_failure(madv_behavior)) {
- end = start + len_in;
- return madvise_inject_error(start, end, madv_behavior);
+ range->start = start;
+ range->end = start + len_in;
+ return madvise_inject_error(madv_behavior);
}
- start = get_untagged_addr(madv_behavior->mm, start);
- end = start + PAGE_ALIGN(len_in);
+ range->start = get_untagged_addr(madv_behavior->mm, start);
+ range->end = range->start + PAGE_ALIGN(len_in);
blk_start_plug(&plug);
if (is_madvise_populate(madv_behavior))
- error = madvise_populate(start, end, madv_behavior);
+ error = madvise_populate(madv_behavior);
else
- error = madvise_walk_vmas(start, end, madv_behavior);
+ error = madvise_walk_vmas(madv_behavior);
blk_finish_plug(&plug);
return error;
}
--
2.49.0
Powered by blists - more mailing lists