[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250812-vdso-mlockall-v1-2-2f49ba7cf819@linutronix.de>
Date: Tue, 12 Aug 2025 08:04:04 +0200
From: Thomas Weißschuh <thomas.weissschuh@...utronix.de>
To: Anna-Maria Behnsen <anna-maria@...utronix.de>,
Frederic Weisbecker <frederic@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>, Andy Lutomirski <luto@...nel.org>,
Vincenzo Frascino <vincenzo.frascino@....com>
Cc: Nam Cao <namcao@...utronix.de>, linux-kernel@...r.kernel.org,
Thomas Weißschuh <thomas.weissschuh@...utronix.de>
Subject: [PATCH 2/3] vdso/datastore: Allow prefaulting by mlockall()
Latency-sensitive applications expect not to experience any pagefaults
after calling mlockall(). However mlockall() ignores VM_PFNMAP and VM_IO
mappings, both of which are used by the generic vDSO datastore.
While the fault handler itself is very fast, going through the full
pagefault exception handling is much slower, on the order of 20us in a
test machine.
Since the memory behind the datastore mappings is always present and
accessible it is not necessary to use VM_IO for them.
VM_PFNMAP can be removed by mapping the pages through 'struct page' instead
of PFNs. VM_MIXEDMAP is necessary to call vmf_insert_page() in the timens
optimization path.
The data page mapping is now also aligned with the architecture-specific
code pages. Some architecture-specific data pages, like the x86 VCLOCK
pages, continue to use VM_IO as they are not always mappable.
Regular mlock() would also work, but userspace does not know the boundaries
of the vDSO.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@...utronix.de>
Tested-by: Nam Cao <namcao@...utronix.de>
---
lib/vdso/datastore.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c
index ed1aa3e27b13f8b48d18dad9488e0798f49cb338..9a1af01f1c4db95255dd67b59129791cc39d37c0 100644
--- a/lib/vdso/datastore.c
+++ b/lib/vdso/datastore.c
@@ -40,8 +40,8 @@ struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data;
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct page *timens_page;
- unsigned long addr, pfn;
+ struct page *page, *timens_page;
+ unsigned long addr;
vm_fault_t err;
if (unlikely(vmf->flags & FAULT_FLAG_REMOTE))
@@ -53,17 +53,17 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
case VDSO_TIME_PAGE_OFFSET:
if (!IS_ENABLED(CONFIG_HAVE_GENERIC_VDSO))
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
+ page = virt_to_page(vdso_k_time_data);
if (timens_page) {
/*
* Fault in VVAR page too, since it will be accessed
* to get clock data anyway.
*/
addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE;
- err = vmf_insert_pfn(vma, addr, pfn);
+ err = vmf_insert_page(vma, addr, page);
if (unlikely(err & VM_FAULT_ERROR))
return err;
- pfn = page_to_pfn(timens_page);
+ page = timens_page;
}
break;
case VDSO_TIMENS_PAGE_OFFSET:
@@ -76,24 +76,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
*/
if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page)
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
+ page = virt_to_page(vdso_k_time_data);
break;
case VDSO_RNG_PAGE_OFFSET:
if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM))
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data));
+ page = virt_to_page(vdso_k_rng_data);
break;
case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END:
if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) +
- vmf->pgoff - VDSO_ARCH_PAGES_START;
+ page = nth_page(virt_to_page(vdso_k_arch_data), vmf->pgoff - VDSO_ARCH_PAGES_START);
break;
default:
return VM_FAULT_SIGBUS;
}
- return vmf_insert_pfn(vma, vmf->address, pfn);
+ get_page(page);
+ vmf->page = page;
+ return 0;
}
const struct vm_special_mapping vdso_vvar_mapping = {
@@ -104,8 +105,8 @@ const struct vm_special_mapping vdso_vvar_mapping = {
struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr)
{
return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE,
- VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP |
- VM_PFNMAP | VM_SEALED_SYSMAP,
+ VM_READ | VM_MAYREAD | VM_DONTDUMP |
+ VM_MIXEDMAP | VM_SEALED_SYSMAP,
&vdso_vvar_mapping);
}
--
2.50.1
Powered by blists - more mailing lists