linux-kernel - Re: [PATCH v5 3/6] x86/sev-es: Split up runtime #VC handler for correct state tracking

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <YMohCkW/mChNpkqi@hirez.programming.kicks-ass.net>
Date:   Wed, 16 Jun 2021 18:04:26 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Joerg Roedel <joro@...tes.org>
Cc:     x86@...nel.org, Joerg Roedel <jroedel@...e.de>, hpa@...or.com,
        Andy Lutomirski <luto@...nel.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Jiri Slaby <jslaby@...e.cz>,
        Dan Williams <dan.j.williams@...el.com>,
        Tom Lendacky <thomas.lendacky@....com>,
        Juergen Gross <jgross@...e.com>,
        Kees Cook <keescook@...omium.org>,
        David Rientjes <rientjes@...gle.com>,
        Cfir Cohen <cfir@...gle.com>,
        Erdem Aktas <erdemaktas@...gle.com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Mike Stunes <mstunes@...are.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Martin Radev <martin.b.radev@...il.com>,
        Arvind Sankar <nivedita@...m.mit.edu>,
        linux-coco@...ts.linux.dev, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org, virtualization@...ts.linux-foundation.org
Subject: Re: [PATCH v5 3/6] x86/sev-es: Split up runtime #VC handler for
 correct state tracking

On Mon, Jun 14, 2021 at 03:53:24PM +0200, Joerg Roedel wrote:

> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
>  
>  	movq	%rsp, %rdi		/* pt_regs pointer */
>  
> -	call	\cfunc
> +	call	kernel_\cfunc
>  
>  	/*
>  	 * No need to switch back to the IST stack. The current stack is either
> @@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)
>  
>  	/* Switch to the regular task stack */
>  .Lfrom_usermode_switch_stack_\@:
> -	idtentry_body safe_stack_\cfunc, has_error_code=1
> +	idtentry_body user_\cfunc, has_error_code=1
>  
>  _ASM_NOKPROBE(\asmsym)
>  SYM_CODE_END(\asmsym)

Consistency with idtentry_mce_db would seem to suggest using \cfunc and
noist_\cfunc.

amluto, tglx: do we have strong feelings on consistency?


> +static bool noinstr vc_check_and_handle_db(struct pt_regs *regs, unsigned long error_code)
> +{
> +	if (likely(error_code != SVM_EXIT_EXCP_BASE + X86_TRAP_DB))
> +		return false;
>  
> +	vc_handle_trap_db(regs);

It's a bit sad this does user_mode(regs) again.

> +
> +	return true;
> +}

Maybe something like:

static __always_inline bool vc_is_db(unsigned long error_code)
{
	return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
}

> +
> +/*
> + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
> + * and will panic when an error happens.
> + */
> +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
> +{
> +	irqentry_state_t irq_state;
>  
> +	/*
> +	 * With the current implementation it is always possible to switch to a
> +	 * safe stack because #VC exceptions only happen at known places, like
> +	 * intercepted instructions or accesses to MMIO areas/IO ports. They can
> +	 * also happen with code instrumentation when the hypervisor intercepts
> +	 * #DB, but the critical paths are forbidden to be instrumented, so #DB
> +	 * exceptions currently also only happen in safe places.
> +	 *
> +	 * But keep this here in case the noinstr annotations are violated due
> +	 * to bug elsewhere.
> +	 */
> +	if (unlikely(on_vc_fallback_stack(regs))) {
> +		instrumentation_begin();
> +		panic("Can't handle #VC exception from unsupported context\n");
> +		instrumentation_end();
> +	}
> +
> +	/*
> +	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
> +	 */
> +	if (vc_check_and_handle_db(regs, error_code))
> +		return;

	if (vc_is_db(error_core)) {
		exc_debug(regs);
		return;
	}

> +
> +	irq_state = irqentry_nmi_enter(regs);
> +
> +	instrumentation_begin();
> +
> +	if (!vc_raw_handle_exception(regs, error_code)) {
>  		/* Show some debug info */
>  		show_regs(regs);
>  
> @@ -1443,23 +1448,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
>  		panic("Returned from Terminate-Request to Hypervisor\n");
>  	}
>  
> +	instrumentation_end();
> +	irqentry_nmi_exit(regs, irq_state);
>  }
>  
> +/*
> + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
> + * and will kill the current task with SIGBUS when an error happens.
> + */
> +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
>  {
> +	irqentry_state_t irq_state;
> +
> +	/*
> +	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
> +	 */
> +	if (vc_check_and_handle_db(regs, error_code))
> +		return;

	if (vs_is_db(error_code)) {
		noist_exc_debug(regs);
		return;
	}

> +
> +	irq_state = irqentry_enter(regs);
>  	instrumentation_begin();
>  
> +	if (!vc_raw_handle_exception(regs, error_code)) {
> +		/*
> +		 * Do not kill the machine if user-space triggered the
> +		 * exception. Send SIGBUS instead and let user-space deal with
> +		 * it.
> +		 */
> +		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
> +	}
> +
> +	instrumentation_end();
> +	irqentry_exit(regs, irq_state);
>  }

Other than that, this seems *much* nicer. Thanks!