From: Andy Lutomirski The kernel is very erratic as to which pagetables have _PAGE_USER set. The vsyscall page gets lucky: it seems that all of the relevant pagetables are among the apparently arbitrary ones that set _PAGE_USER. Rather than relying on chance, just explicitly set _PAGE_USER. This will let us clean up pagetable setup to stop setting _PAGE_USER. The added code can also be reused by pagetable isolation to manage the _PAGE_USER bit in the usermode tables. Signed-off-by: Andy Lutomirski Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner --- arch/x86/entry/vsyscall/vsyscall_64.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -329,16 +329,47 @@ int in_gate_area_no_mm(unsigned long add return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +static void __init set_vsyscall_pgtable_user_bits(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(VSYSCALL_ADDR); + pgd->pgd |= _PAGE_USER; + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + p4d->p4d |= _PAGE_USER; +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + pud->pud |= _PAGE_USER; + pmd = pmd_offset(pud, VSYSCALL_ADDR); + pmd->pmd |= _PAGE_USER; +} + void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) + if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(); + } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR);