[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <21d90844-1cb1-46ab-a2bb-62f2478b7dfb@kernel.org>
Date: Thu, 5 Feb 2026 12:43:03 +0100
From: "David Hildenbrand (arm)" <david@...nel.org>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
Alice Ryhl <aliceryhl@...gle.com>
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Carlos Llamas <cmllamas@...gle.com>, Alexander Viro
<viro@...iv.linux.org.uk>, Christian Brauner <brauner@...nel.org>,
Jan Kara <jack@...e.cz>, Paul Moore <paul@...l-moore.com>,
James Morris <jmorris@...ei.org>, "Serge E. Hallyn" <serge@...lyn.com>,
Andrew Morton <akpm@...ux-foundation.org>, Dave Chinner
<david@...morbit.com>, Qi Zheng <zhengqi.arch@...edance.com>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Muchun Song <muchun.song@...ux.dev>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>, Vlastimil Babka
<vbabka@...e.cz>, Mike Rapoport <rppt@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Miguel Ojeda <ojeda@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Gary Guo <gary@...yguo.net>, Björn Roy Baron
<bjorn3_gh@...tonmail.com>, Benno Lossin <lossin@...nel.org>,
Andreas Hindborg <a.hindborg@...nel.org>, Trevor Gross <tmgross@...ch.edu>,
Danilo Krummrich <dakr@...nel.org>, kernel-team@...roid.com,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-security-module@...r.kernel.org, linux-mm@...ck.org,
rust-for-linux@...r.kernel.org
Subject: Re: [PATCH 3/5] mm: export zap_page_range_single and list_lru_add/del
On 2/5/26 12:29, Lorenzo Stoakes wrote:
> On Thu, Feb 05, 2026 at 10:51:28AM +0000, Alice Ryhl wrote:
>> These are the functions needed by Binder's shrinker.
>>
>> Binder uses zap_page_range_single in the shrinker path to remove an
>> unused page from the mmap'd region. Note that pages are only removed
>> from the mmap'd region lazily when shrinker asks for it.
>>
>> Binder uses list_lru_add/del to keep track of the shrinker lru list, and
>> it can't use _obj because the list head is not stored inline in the page
>> actually being lru freed, so page_to_nid(virt_to_page(item)) on the list
>> head computes the nid of the wrong page.
>>
>> Signed-off-by: Alice Ryhl <aliceryhl@...gle.com>
>> ---
>> mm/list_lru.c | 2 ++
>> mm/memory.c | 1 +
>> 2 files changed, 3 insertions(+)
>>
>> diff --git a/mm/list_lru.c b/mm/list_lru.c
>> index ec48b5dadf519a5296ac14cda035c067f9e448f8..bf95d73c9815548a19db6345f856cee9baad22e3 100644
>> --- a/mm/list_lru.c
>> +++ b/mm/list_lru.c
>> @@ -179,6 +179,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
>> unlock_list_lru(l, false);
>> return false;
>> }
>> +EXPORT_SYMBOL_GPL(list_lru_add);
>>
>> bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
>> {
>> @@ -216,6 +217,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
>> unlock_list_lru(l, false);
>> return false;
>> }
>> +EXPORT_SYMBOL_GPL(list_lru_del);
>
> Same point as before about exporting symbols, but given the _obj variants are
> exported already this one is more valid.
>
>>
>> bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
>> {
>> diff --git a/mm/memory.c b/mm/memory.c
>> index da360a6eb8a48e29293430d0c577fb4b6ec58099..64083ace239a2caf58e1645dd5d91a41d61492c4 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2168,6 +2168,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
>> zap_page_range_single_batched(&tlb, vma, address, size, details);
>> tlb_finish_mmu(&tlb);
>> }
>> +EXPORT_SYMBOL(zap_page_range_single);
>
> Sorry but I don't want this exported at all.
>
> This is an internal implementation detail which allows fine-grained control of
> behaviour via struct zap_details (which binder doesn't use, of course :)
I don't expect anybody to set zap_details, but yeah, it could be abused.
It could be abused right now from anywhere else in the kernel
where we don't build as a module :)
Apparently we export a similar function in rust where we just removed the last parameter.
I think zap_page_range_single() is only called with non-NULL from mm/memory.c.
So the following makes likely sense even outside of the context of this series:
From d2a2d20994456b9a66008b7fef12e379e76fc9f8 Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (arm)" <david@...nel.org>
Date: Thu, 5 Feb 2026 12:42:09 +0100
Subject: [PATCH] tmp
Signed-off-by: David Hildenbrand (arm) <david@...nel.org>
---
arch/s390/mm/gmap_helpers.c | 2 +-
drivers/android/binder_alloc.c | 2 +-
include/linux/mm.h | 4 ++--
kernel/bpf/arena.c | 3 +--
kernel/events/core.c | 2 +-
mm/memory.c | 15 +++++++++------
net/ipv4/tcp.c | 5 ++---
rust/kernel/mm/virt.rs | 2 +-
8 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index d41b19925a5a..859f5570c3dc 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -102,7 +102,7 @@ void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned lo
if (!vma)
return;
if (!is_vm_hugetlb_page(vma))
- zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+ zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr);
vmaddr = vma->vm_end;
}
}
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 979c96b74cad..b0201bc6893a 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1186,7 +1186,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
if (vma) {
trace_binder_unmap_user_start(alloc, index);
- zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL);
+ zap_page_range_single(vma, page_addr, PAGE_SIZE);
trace_binder_unmap_user_end(alloc, index);
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0d5be9dc736..b7cc6ef49917 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2621,11 +2621,11 @@ struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, struct zap_details *details);
+ unsigned long size);
static inline void zap_vma_pages(struct vm_area_struct *vma)
{
zap_page_range_single(vma, vma->vm_start,
- vma->vm_end - vma->vm_start, NULL);
+ vma->vm_end - vma->vm_start);
}
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long start,
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 872dc0e41c65..242c931d3740 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -503,8 +503,7 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
struct vma_list *vml;
list_for_each_entry(vml, &arena->vma_list, head)
- zap_page_range_single(vml->vma, uaddr,
- PAGE_SIZE * page_cnt, NULL);
+ zap_page_range_single(vml->vma, uaddr, PAGE_SIZE * page_cnt);
}
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8cca80094624..1dfb33c39c2f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6926,7 +6926,7 @@ static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
#ifdef CONFIG_MMU
/* Clear any partial mappings on error. */
if (err)
- zap_page_range_single(vma, vma->vm_start, nr_pages * PAGE_SIZE, NULL);
+ zap_page_range_single(vma, vma->vm_start, nr_pages * PAGE_SIZE);
#endif
return err;
diff --git a/mm/memory.c b/mm/memory.c
index da360a6eb8a4..4f8dcdcd20f3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2155,17 +2155,16 @@ void zap_page_range_single_batched(struct mmu_gather *tlb,
* @vma: vm_area_struct holding the applicable pages
* @address: starting address of pages to zap
* @size: number of bytes to zap
- * @details: details of shared cache invalidation
*
* The range must fit into one VMA.
*/
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, struct zap_details *details)
+ unsigned long size)
{
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, vma->vm_mm);
- zap_page_range_single_batched(&tlb, vma, address, size, details);
+ zap_page_range_single_batched(&tlb, vma, address, size, NULL);
tlb_finish_mmu(&tlb);
}
@@ -2187,7 +2186,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
!(vma->vm_flags & VM_PFNMAP))
return;
- zap_page_range_single(vma, address, size, NULL);
+ zap_page_range_single(vma, address, size);
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
@@ -2963,7 +2962,7 @@ static int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long add
* maintain page reference counts, and callers may free
* pages due to the error. So zap it early.
*/
- zap_page_range_single(vma, addr, size, NULL);
+ zap_page_range_single(vma, addr, size);
return error;
}
@@ -4187,7 +4186,11 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
unsigned long start_addr, unsigned long end_addr,
struct zap_details *details)
{
- zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, vma->vm_mm);
+ zap_page_range_single_batched(&tlb, vma, address, size, details);
+ tlb_finish_mmu(&tlb);
}
static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5319ebe2452..9e92c71389f3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2052,7 +2052,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
maybe_zap_len = total_bytes_to_map - /* All bytes to map */
*length + /* Mapped or pending */
(pages_remaining * PAGE_SIZE); /* Failed map. */
- zap_page_range_single(vma, *address, maybe_zap_len, NULL);
+ zap_page_range_single(vma, *address, maybe_zap_len);
err = 0;
}
@@ -2217,8 +2217,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
if (total_bytes_to_map) {
if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
- zap_page_range_single(vma, address, total_bytes_to_map,
- NULL);
+ zap_page_range_single(vma, address, total_bytes_to_map);
zc->length = total_bytes_to_map;
zc->recv_skip_hint = 0;
} else {
diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
index da21d65ccd20..b8e59e4420f3 100644
--- a/rust/kernel/mm/virt.rs
+++ b/rust/kernel/mm/virt.rs
@@ -124,7 +124,7 @@ pub fn zap_page_range_single(&self, address: usize, size: usize) {
// sufficient for this method call. This method has no requirements on the vma flags. The
// address range is checked to be within the vma.
unsafe {
- bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut())
+ bindings::zap_page_range_single(self.as_ptr(), address, size)
};
}
--
2.43.0
--
Cheers,
David
Powered by blists - more mailing lists