lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 3 Jul 2019 13:27:09 -0700
From:   Andy Lutomirski <luto@...nel.org>
To:     root <peterz@...radead.org>
Cc:     Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>, Ingo Molnar <mingo@...nel.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        Andrew Lutomirski <luto@...nel.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Juergen Gross <jgross@...e.com>,
        LKML <linux-kernel@...r.kernel.org>,
        He Zhe <zhe.he@...driver.com>,
        Joel Fernandes <joel@...lfernandes.org>, devel@...ukata.com
Subject: Re: [PATCH 3/3] x86/mm, tracing: Fix CR2 corruption

On Wed, Jul 3, 2019 at 3:28 AM root <peterz@...radead.org> wrote:
>
> Despire the current efforts to read CR2 before tracing happens there
> still exist a number of possible holes:
>
>   idtentry page_fault             do_page_fault           has_error_code=1
>     call error_entry
>       TRACE_IRQS_OFF
>         call trace_hardirqs_off*
>           #PF // modifies CR2
>
>       CALL_enter_from_user_mode
>         __context_tracking_exit()
>           trace_user_exit(0)
>             #PF // modifies CR2
>
>     call do_page_fault
>       address = read_cr2(); /* whoopsie */
>
> And similar for i386.
>
> Fix it by pulling the CR2 read into the entry code, before any of that
> stuff gets a chance to run and ruin things.
>
> Ideally we'll clean up the entry code by moving this tracing and
> context tracking nonsense into C some day, but let's not delay fixing
> this longer.
>
> Reported-by: He Zhe <zhe.he@...driver.com>
> Reported-by: Eiichi Tsukata <devel@...ukata.com>
> Debugged-by: Steven Rostedt <rostedt@...dmis.org>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
>  arch/x86/entry/entry_32.S       |   25 ++++++++++++++++++++++---
>  arch/x86/entry/entry_64.S       |   28 ++++++++++++++--------------
>  arch/x86/include/asm/kvm_para.h |    2 +-
>  arch/x86/include/asm/traps.h    |    2 +-
>  arch/x86/kernel/kvm.c           |    8 ++++----
>  arch/x86/mm/fault.c             |   28 ++++++++++------------------
>  6 files changed, 52 insertions(+), 41 deletions(-)
>
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -1443,9 +1443,28 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
>
>  ENTRY(page_fault)
>         ASM_CLAC
> -       pushl   $do_page_fault
> -       ALIGN
> -       jmp common_exception
> +       pushl   $0; /* %gs's slot on the stack */
> +
> +       SAVE_ALL switch_stacks=1 skip_gs=1
> +
> +       ENCODE_FRAME_POINTER
> +       UNWIND_ESPFIX_STACK
> +
> +       /* fixup %gs */
> +       GS_TO_REG %ecx
> +       REG_TO_PTGS %ecx
> +       SET_KERNEL_GS %ecx
> +
> +       GET_CR2_INTO(%ecx)                      # might clobber %eax
> +
> +       /* fixup orig %eax */
> +       movl    PT_ORIG_EAX(%esp), %edx         # get the error code
> +       movl    $-1, PT_ORIG_EAX(%esp)          # no syscall to restart
> +
> +       TRACE_IRQS_OFF
> +       movl    %esp, %eax                      # pt_regs pointer
> +       call    do_page_fault
> +       jmp     ret_from_exception
>  END(page_fault)
>
>  common_exception:
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -901,7 +901,7 @@ apicinterrupt IRQ_WORK_VECTOR                       irq_work
>   * @paranoid == 2 is special: the stub will never switch stacks.  This is for
>   * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
>   */
> -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
> +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
>  ENTRY(\sym)
>         UNWIND_HINT_IRET_REGS offset=\has_error_code*8
>
> @@ -937,18 +937,27 @@ ENTRY(\sym)
>
>         .if \paranoid
>         call    paranoid_entry
> +       /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
>         .else
>         call    error_entry
>         .endif
>         UNWIND_HINT_REGS
> -       /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
>
> -       .if \paranoid
> +       .if \read_cr2
> +       GET_CR2_INTO(%rdx);                     /* can clobber %rax */
> +       .endif
> +
>         .if \shift_ist != -1
>         TRACE_IRQS_OFF_DEBUG                    /* reload IDT in case of recursion */
>         .else
>         TRACE_IRQS_OFF
>         .endif
> +
> +       .if \paranoid == 0
> +       testb   $3, CS(%rsp)
> +       jz      .Lfrom_kernel_no_context_tracking_\@
> +       CALL_enter_from_user_mode
> +.Lfrom_kernel_no_context_tracking_\@:
>         .endif
>
>         movq    %rsp, %rdi                      /* pt_regs pointer */
> @@ -1180,10 +1189,10 @@ idtentry xenint3                do_int3                 has_error_co
>  #endif
>
>  idtentry general_protection    do_general_protection   has_error_code=1
> -idtentry page_fault            do_page_fault           has_error_code=1
> +idtentry page_fault            do_page_fault           has_error_code=1        read_cr2=1
>
>  #ifdef CONFIG_KVM_GUEST
> -idtentry async_page_fault      do_async_page_fault     has_error_code=1
> +idtentry async_page_fault      do_async_page_fault     has_error_code=1        read_cr2=1
>  #endif
>
>  #ifdef CONFIG_X86_MCE
> @@ -1338,18 +1347,9 @@ ENTRY(error_entry)
>         movq    %rax, %rsp                      /* switch stack */
>         ENCODE_FRAME_POINTER
>         pushq   %r12
> -
> -       /*
> -        * We need to tell lockdep that IRQs are off.  We can't do this until
> -        * we fix gsbase, and we should do it before enter_from_user_mode
> -        * (which can take locks).
> -        */
> -       TRACE_IRQS_OFF

This hunk looks wrong.  Am I missing some other place that handles the
case where we enter from kernel mode and IRQs were on?

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ