lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZDSEjhGV9D90J6Bx@google.com>
Date:   Mon, 10 Apr 2023 14:50:06 -0700
From:   Sean Christopherson <seanjc@...gle.com>
To:     Xin Li <xin3.li@...el.com>
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org, kvm@...r.kernel.org,
        tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
        dave.hansen@...ux.intel.com, hpa@...or.com, peterz@...radead.org,
        andrew.cooper3@...rix.com, pbonzini@...hat.com,
        ravi.v.shankar@...el.com, jiangshanlai@...il.com,
        shan.kang@...el.com
Subject: Re: [PATCH v8 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to
 generate FRED stack frames

"KVM: VMX:" for the shortlog please.

On Mon, Apr 10, 2023, Xin Li wrote:
> -.macro VMX_DO_EVENT_IRQOFF call_insn call_target
> +.macro VMX_DO_EVENT_IRQOFF call_insn call_target fred=0 nmi=0
>  	/*
>  	 * Unconditionally create a stack frame, getting the correct RSP on the
>  	 * stack (for x86-64) would take two instructions anyways, and RBP can
> @@ -41,16 +43,55 @@
>  	mov %_ASM_SP, %_ASM_BP
>  
>  #ifdef CONFIG_X86_64
> +#ifdef CONFIG_X86_FRED
> +	/*
> +	 * It's not necessary to change current stack level for handling IRQ/NMI
> +	 * because the state of the kernel stack is well defined in this place
> +	 * in the code, and it is known not to be deep in a bunch of nested I/O
> +	 * layer handlers that eat up the stack.
> +	 *
> +	 * Before starting to push a FRED stack frame, FRED reserves a redzone
> +	 * (for CALL emulation) and aligns RSP to a 64-byte boundary.
> +	 */
> +	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> +	and $FRED_STACK_FRAME_RSP_MASK, %rsp
> +
> +	/*
> +	 * A FRED stack frame has extra 16 bytes of information pushed at the
> +	 * regular stack top comparing to an IDT stack frame.
> +	 */
> +	push $0		/* Reserved by FRED, must be 0 */
> +	push $0		/* FRED event data, 0 for NMI and external interrupts */
> +#else
>  	/*
>  	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
>  	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
>  	 */
>  	and  $-16, %rsp
> -	push $__KERNEL_DS
> +#endif
> +
> +	.if \fred
> +	.if \nmi
> +	mov $(2 << 32 | 2 << 48), %rax		/* NMI event type and vector */
> +	.else
> +	mov %rdi, %rax
> +	shl $32, %rax				/* External interrupt vector */
> +	.endif
> +	add $__KERNEL_DS, %rax
> +	bts $57, %rax				/* Set 64-bit mode */
> +	.else
> +	mov $__KERNEL_DS, %rax
> +	.endif
> +	push %rax

This is painfully difficult to read, and the trampolines only add to that pain.
Using macros instead of magic numbers would alleviate a small amount of pain, but
but the #ifdefs and .if \fred/\nmi are the real culprits.

>  static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
> @@ -6916,14 +6916,20 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  {
>  	u32 intr_info = vmx_get_intr_info(vcpu);
>  	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
> -	gate_desc *desc = (gate_desc *)host_idt_base + vector;
> +	unsigned long entry_or_vector;
> +
> +#ifdef CONFIG_X86_64
> +	entry_or_vector = vector;
> +#else
> +	entry_or_vector = gate_offset((gate_desc *)host_idt_base + vector);
> +#endif

And then this is equally gross.  Rather than funnel FRED+legacy into a single
function only to split them back out, just route FRED into its own asm subroutine.
The common bits are basically the creation/destruction of the stack frame and the
CALL itself, i.e. the truly interesting bits are what's different.

Pretty much all of the #ifdeffery goes away, the helpers just need #ifdefs to
play nice with CONFIG_X86_FRED=n.  E.g. something like the below as a starting
point (it most definitely doesn't compile, and most definitely isn't 100% correct).

---
 arch/x86/kvm/vmx/vmenter.S | 72 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 19 ++++++++--
 2 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 631fd7da2bc3..a6929c78e038 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -2,12 +2,14 @@
 #include <linux/linkage.h>
 #include <asm/asm.h>
 #include <asm/bitsperlong.h>
+#include <asm/fred.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/nospec-branch.h>
 #include <asm/percpu.h>
 #include <asm/segment.h>
 #include "kvm-asm-offsets.h"
 #include "run_flags.h"
+#include "../../entry/calling.h"
 
 #define WORD_SIZE (BITS_PER_LONG / 8)
 
@@ -31,6 +33,62 @@
 #define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
 #endif
 
+#ifdef CONFIG_X86_FRED
+.macro VMX_DO_FRED_EVENT_IRQOFF call_target cs_val
+	/*
+	 * Unconditionally create a stack frame, getting the correct RSP on the
+	 * stack (for x86-64) would take two instructions anyways, and RBP can
+	 * be used to restore RSP to make objtool happy (see below).
+	 */
+	push %_ASM_BP
+	mov %_ASM_SP, %_ASM_BP
+
+	/*
+	 * Don't check the FRED stack level, the call stack leading to this
+	 * helper is effectively constant and shallow (relatively speaking).
+	 *
+	 * Emulate the FRED-defined redzone and stack alignment (128 bytes and
+	 * 64 bytes respectively).
+	 */
+	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+	and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+	/*
+	* A FRED stack frame has extra 16 bytes of information pushed at the
+	* regular stack top compared to an IDT stack frame.
+	*/
+	push $0         /* Reserved by FRED, must be 0 */
+	push $0         /* FRED event data, 0 for NMI and external interrupts */
+	shl $32, %rax
+	orq $__KERNEL_DS | $FRED_64_BIT_MODE, %ax
+	push %rax	/* Vector (from the "caller") and DS */
+
+	push %rbp
+	pushf
+	push \cs_val
+
+	push $0 /* FRED error code, 0 for NMI and external interrupts */
+	PUSH_REGS
+
+	/* Load @pt_regs */
+	movq    %rsp, %_ASM_ARG1
+
+	call \call_target
+
+	POP_REGS
+
+	/*
+	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+	 * the correct value.  objtool doesn't know the callee will IRET and,
+	 * without the explicit restore, thinks the stack is getting walloped.
+	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+	 */
+	mov %_ASM_BP, %_ASM_SP
+	pop %_ASM_BP
+	RET
+.endm
+#endif
+
 .macro VMX_DO_EVENT_IRQOFF call_insn call_target
 	/*
 	 * Unconditionally create a stack frame, getting the correct RSP on the
@@ -299,6 +357,14 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 
 SYM_FUNC_END(__vmx_vcpu_run)
 
+#ifdef CONFIG_X86_FRED
+SYM_FUNC_START(vmx_do_fred_nmi_irqoff)
+	push $FRED_NMI_ERROR_CODE
+	mov $NMI_VECTOR | $FRED_NMI_SOMETHING, %eax
+	VMX_DO_FRED_EVENT_IRQOFF call fred_entrypoint_kernel $FRED_NMI_CS_VAL
+SYM_FUNC_END(vmx_do_nmi_irqoff)
+#endif
+
 SYM_FUNC_START(vmx_do_nmi_irqoff)
 	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
 SYM_FUNC_END(vmx_do_nmi_irqoff)
@@ -357,6 +423,12 @@ SYM_FUNC_START(vmread_error_trampoline)
 SYM_FUNC_END(vmread_error_trampoline)
 #endif
 
+#ifdef CONFIG_X86_FRED
+SYM_FUNC_START(vmx_do_fred_interrupt_irqoff)
+	mov %_ASM_ARG1, %rax
+	VMX_DO_FRED_EVENT_IRQOFF call external_interrupt
+#endif
+
 SYM_FUNC_START(vmx_do_interrupt_irqoff)
 	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
 SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 11080a649f60..42f50b0cc125 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6891,6 +6891,14 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
+#ifdef CONFIG_X86_FRED
+void vmx_do_fred_interrupt_irqoff(unsigned int vector);
+void vmx_do_fred_nmi_irqoff(unsigned int vector);
+#else
+#define vmx_do_fred_interrupt_irqoff(x) BUG();
+#define vmx_do_fred_nmi_irqoff(x) BUG();
+#endif
+
 void vmx_do_interrupt_irqoff(unsigned long entry);
 void vmx_do_nmi_irqoff(void);
 
@@ -6933,14 +6941,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
 	u32 intr_info = vmx_get_intr_info(vcpu);
 	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
-	gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
 	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
 	    "unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
 	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
-	vmx_do_interrupt_irqoff(gate_offset(desc));
+	if (cpu_feature_enabled(X86_FEATURE_FRED))
+		vmx_do_fred_interrupt_irqoff(vector);
+	else
+		vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
 	kvm_after_interrupt(vcpu);
 
 	vcpu->arch.at_instruction_boundary = true;
@@ -7226,7 +7236,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
 	    is_nmi(vmx_get_intr_info(vcpu))) {
 		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
-		vmx_do_nmi_irqoff();
+		if (cpu_feature_enabled(X86_FEATURE_FRED))
+			vmx_do_fred_nmi_irqoff();
+		else
+			vmx_do_nmi_irqoff();
 		kvm_after_interrupt(vcpu);
 	}
 

base-commit: 33d1a64081c98e390e064db18738428d6fb96f95
-- 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ