lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 12 Jul 2018 14:09:45 -0700
From:   Andy Lutomirski <luto@...capital.net>
To:     Joerg Roedel <joro@...tes.org>
Cc:     Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...nel.org>,
        "H . Peter Anvin" <hpa@...or.com>, x86@...nel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Andy Lutomirski <luto@...nel.org>,
        Dave Hansen <dave.hansen@...el.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Juergen Gross <jgross@...e.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Borislav Petkov <bp@...en8.de>, Jiri Kosina <jkosina@...e.cz>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Brian Gerst <brgerst@...il.com>,
        David Laight <David.Laight@...lab.com>,
        Denys Vlasenko <dvlasenk@...hat.com>,
        Eduardo Valentin <eduval@...zon.com>,
        Greg KH <gregkh@...uxfoundation.org>,
        Will Deacon <will.deacon@....com>, aliguori@...zon.com,
        daniel.gruss@...k.tugraz.at, hughd@...gle.com, keescook@...gle.com,
        Andrea Arcangeli <aarcange@...hat.com>,
        Waiman Long <llong@...hat.com>, Pavel Machek <pavel@....cz>,
        "David H . Gutteridge" <dhgutteridge@...patico.ca>, jroedel@...e.de
Subject: Re: [PATCH 07/39] x86/entry/32: Enter the kernel via trampoline stack



> On Jul 11, 2018, at 4:29 AM, Joerg Roedel <joro@...tes.org> wrote:
> 
> From: Joerg Roedel <jroedel@...e.de>
> 
> Use the entry-stack as a trampoline to enter the kernel. The
> entry-stack is already in the cpu_entry_area and will be
> mapped to userspace when PTI is enabled.
> 
> Signed-off-by: Joerg Roedel <jroedel@...e.de>
> ---
> arch/x86/entry/entry_32.S        | 136 +++++++++++++++++++++++++++++++--------
> arch/x86/include/asm/switch_to.h |   6 +-
> arch/x86/kernel/asm-offsets.c    |   1 +
> arch/x86/kernel/cpu/common.c     |   5 +-
> arch/x86/kernel/process.c        |   2 -
> arch/x86/kernel/process_32.c     |  10 +--
> 6 files changed, 121 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 61303fa..528db7d 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -154,25 +154,36 @@
> 
> #endif /* CONFIG_X86_32_LAZY_GS */
> 
> -.macro SAVE_ALL pt_regs_ax=%eax
> +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
>    cld
> +    /* Push segment registers and %eax */
>    PUSH_GS
>    pushl    %fs
>    pushl    %es
>    pushl    %ds
>    pushl    \pt_regs_ax
> +
> +    /* Load kernel segments */
> +    movl    $(__USER_DS), %eax

If \pt_regs_ax != %eax, then this will behave oddly. Maybe it’s okay. But I don’t see why this change was needed at all.

> +    movl    %eax, %ds
> +    movl    %eax, %es
> +    movl    $(__KERNEL_PERCPU), %eax
> +    movl    %eax, %fs
> +    SET_KERNEL_GS %eax
> +
> +    /* Push integer registers and complete PT_REGS */
>    pushl    %ebp
>    pushl    %edi
>    pushl    %esi
>    pushl    %edx
>    pushl    %ecx
>    pushl    %ebx
> -    movl    $(__USER_DS), %edx
> -    movl    %edx, %ds
> -    movl    %edx, %es
> -    movl    $(__KERNEL_PERCPU), %edx
> -    movl    %edx, %fs
> -    SET_KERNEL_GS %edx
> +
> +    /* Switch to kernel stack if necessary */
> +.if \switch_stacks > 0
> +    SWITCH_TO_KERNEL_STACK
> +.endif
> +
> .endm
> 
> /*
> @@ -269,6 +280,72 @@
> .Lend_\@:
> #endif /* CONFIG_X86_ESPFIX32 */
> .endm
> +
> +
> +/*
> + * Called with pt_regs fully populated and kernel segments loaded,
> + * so we can access PER_CPU and use the integer registers.
> + *
> + * We need to be very careful here with the %esp switch, because an NMI
> + * can happen everywhere. If the NMI handler finds itself on the
> + * entry-stack, it will overwrite the task-stack and everything we
> + * copied there. So allocate the stack-frame on the task-stack and
> + * switch to it before we do any copying.

Ick, right. Same with machine check, though. You could alternatively fix it by running NMIs on an irq stack if the irq count is zero.  How confident are you that you got #MC right?

> + */
> +.macro SWITCH_TO_KERNEL_STACK
> +
> +    ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
> +
> +    /* Are we on the entry stack? Bail out if not! */
> +    movl    PER_CPU_VAR(cpu_entry_area), %edi
> +    addl    $CPU_ENTRY_AREA_entry_stack, %edi
> +    cmpl    %esp, %edi
> +    jae    .Lend_\@

That’s an alarming assumption about the address space layout. How about an xor and an and instead of cmpl?  As it stands, if the address layout ever changes, the failure may be rather subtle.

Anyway, wouldn’t it be easier to solve this by just not switching stacks on entries from kernel mode and making the entry stack bigger?  Stick an assertion in the scheduling code that we’re not on an entry stack, perhaps.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ