[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190425161416.26600-16-dima@arista.com>
Date: Thu, 25 Apr 2019 17:14:04 +0100
From: Dmitry Safonov <dima@...sta.com>
To: linux-kernel@...r.kernel.org
Cc: Dmitry Safonov <dima@...sta.com>, Adrian Reber <adrian@...as.de>,
Andrei Vagin <avagin@...nvz.org>,
Andy Lutomirski <luto@...nel.org>,
Arnd Bergmann <arnd@...db.de>,
Christian Brauner <christian.brauner@...ntu.com>,
Cyrill Gorcunov <gorcunov@...nvz.org>,
Dmitry Safonov <0x7f454c46@...il.com>,
"Eric W. Biederman" <ebiederm@...ssion.com>,
"H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>,
Jeff Dike <jdike@...toit.com>, Oleg Nesterov <oleg@...hat.com>,
Pavel Emelyanov <xemul@...tuozzo.com>,
Shuah Khan <shuah@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Vincenzo Frascino <vincenzo.frascino@....com>,
containers@...ts.linux-foundation.org, criu@...nvz.org,
linux-api@...r.kernel.org, x86@...nel.org
Subject: [PATCHv3 15/27] x86/vdso: Allocate timens vdso
As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.
It will add a penalty for everybody as branch predictor may mispredict
the jump. Also there are instruction cache lines wasted on cmp/jmp.
Those effects of introducing time namespace are very much unwanted
having in mind how much work have been spent on micro-optimisation
vdso code.
The propose is to allocate a second vdso code with dynamically
patched out (disabled by static_branch) timens code on boot time.
Allocate another vdso and copy original code.
Signed-off-by: Dmitry Safonov <dima@...sta.com>
---
arch/x86/entry/vdso/vdso2c.h | 2 +-
arch/x86/entry/vdso/vma.c | 36 +++++++++++++++++++++++++++++++-----
arch/x86/include/asm/vdso.h | 9 +++++----
3 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 086a11aee0e8..660f725a02c1 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -156,7 +156,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "const struct vdso_image %s = {\n", name);
+ fprintf(outfile, "struct vdso_image %s __ro_after_init = {\n", name);
fprintf(outfile, "\t.text = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (alt_sec) {
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 80cbb2167eba..6aae9c0d400d 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -30,26 +30,52 @@
unsigned int __read_mostly vdso64_enabled = 1;
#endif
-void __init init_vdso_image(const struct vdso_image *image)
+void __init init_vdso_image(struct vdso_image *image)
{
BUG_ON(image->size % PAGE_SIZE != 0);
apply_alternatives((struct alt_instr *)(image->text + image->alt),
(struct alt_instr *)(image->text + image->alt +
image->alt_len));
+#ifdef CONFIG_TIME_NS
+ image->text_timens = vmalloc_32(image->size);
+ if (WARN_ON(image->text_timens == NULL))
+ return;
+
+ memcpy(image->text_timens, image->text, image->size);
+#endif
}
struct linux_binprm;
+#ifdef CONFIG_TIME_NS
+static inline struct timens_offsets *current_timens_offsets(void)
+{
+ struct time_namespace *ns = current->nsproxy->time_ns;
+
+ return ns->offsets;
+}
+#else
+static inline struct timens_offsets *current_timens_offsets(void)
+{
+ return NULL;
+}
+#endif
+
static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ unsigned long offset = vmf->pgoff << PAGE_SHIFT;
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
return VM_FAULT_SIGBUS;
- vmf->page = virt_to_page(image->text + (vmf->pgoff << PAGE_SHIFT));
+ if (current_timens_offsets() && image->text_timens)
+ vmf->page = vmalloc_to_page(image->text_timens + offset);
+ else
+ vmf->page = virt_to_page(image->text + offset);
+
get_page(vmf->page);
return 0;
}
@@ -138,13 +164,13 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
return vmf_insert_pfn(vma, vmf->address,
vmalloc_to_pfn(tsc_pg));
} else if (sym_offset == image->sym_timens_page) {
- struct time_namespace *ns = current->nsproxy->time_ns;
+ struct timens_offsets *offsets = current_timens_offsets();
unsigned long pfn;
- if (!ns->offsets)
+ if (!offsets)
pfn = page_to_pfn(ZERO_PAGE(0));
else
- pfn = page_to_pfn(virt_to_page(ns->offsets));
+ pfn = page_to_pfn(virt_to_page(offsets));
return vmf_insert_pfn(vma, vmf->address, pfn);
}
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index b0eb59c198eb..a620c7f23425 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -12,6 +12,7 @@
struct vdso_image {
void *text;
+ void *text_timens;
unsigned long size; /* Always a multiple of PAGE_SIZE */
unsigned long alt, alt_len;
@@ -31,18 +32,18 @@ struct vdso_image {
};
#ifdef CONFIG_X86_64
-extern const struct vdso_image vdso_image_64;
+extern struct vdso_image vdso_image_64;
#endif
#ifdef CONFIG_X86_X32
-extern const struct vdso_image vdso_image_x32;
+extern struct vdso_image vdso_image_x32;
#endif
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32;
+extern struct vdso_image vdso_image_32;
#endif
-extern void __init init_vdso_image(const struct vdso_image *image);
+extern void __init init_vdso_image(struct vdso_image *image);
extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
--
2.21.0
Powered by blists - more mailing lists