[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <SA1PR11MB6734A497DC7A0243A2F401EEA808A@SA1PR11MB6734.namprd11.prod.outlook.com>
Date: Thu, 3 Aug 2023 16:58:31 +0000
From: "Li, Xin3" <xin3.li@...el.com>
To: "Li, Xin3" <xin3.li@...el.com>,
"Christopherson,, Sean" <seanjc@...gle.com>
CC: "linux-doc@...r.kernel.org" <linux-doc@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-edac@...r.kernel.org" <linux-edac@...r.kernel.org>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>,
"xen-devel@...ts.xenproject.org" <xen-devel@...ts.xenproject.org>,
Jonathan Corbet <corbet@....net>,
"Thomas Gleixner" <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
"Borislav Petkov" <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"x86@...nel.org" <x86@...nel.org>,
"H . Peter Anvin" <hpa@...or.com>,
"Lutomirski, Andy" <luto@...nel.org>,
Oleg Nesterov <oleg@...hat.com>,
"Luck, Tony" <tony.luck@...el.com>,
"K . Y . Srinivasan" <kys@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>,
Wei Liu <wei.liu@...nel.org>,
"Cui, Dexuan" <decui@...rosoft.com>,
Paolo Bonzini <pbonzini@...hat.com>,
"Wanpeng Li" <wanpengli@...cent.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
"Peter Zijlstra" <peterz@...radead.org>,
"Gross, Jurgen" <jgross@...e.com>,
"Stefano Stabellini" <sstabellini@...nel.org>,
Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>,
Josh Poimboeuf <jpoimboe@...nel.org>,
"Paul E . McKenney" <paulmck@...nel.org>,
Catalin Marinas <catalin.marinas@....com>,
Randy Dunlap <rdunlap@...radead.org>,
"Steven Rostedt" <rostedt@...dmis.org>,
Kim Phillips <kim.phillips@....com>,
Hyeonggon Yoo <42.hyeyoo@...il.com>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Sebastian Reichel <sebastian.reichel@...labora.com>,
"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
Suren Baghdasaryan <surenb@...gle.com>,
Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
Babu Moger <babu.moger@....com>,
Jim Mattson <jmattson@...gle.com>,
Sandipan Das <sandipan.das@....com>,
Lai Jiangshan <jiangshanlai@...il.com>,
Hans de Goede <hdegoede@...hat.com>,
"Chatre, Reinette" <reinette.chatre@...el.com>,
Daniel Sneddon <daniel.sneddon@...ux.intel.com>,
Breno Leitao <leitao@...ian.org>,
Nikunj A Dadhania <nikunj@....com>,
Brian Gerst <brgerst@...il.com>,
Sami Tolvanen <samitolvanen@...gle.com>,
"Alexander Potapenko" <glider@...gle.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Arnd Bergmann <arnd@...db.de>,
"Eric W . Biederman" <ebiederm@...ssion.com>,
Kees Cook <keescook@...omium.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Masahiro Yamada <masahiroy@...nel.org>,
Ze Gao <zegao2021@...il.com>,
"Raj, Ashok" <ashok.raj@...el.com>,
"Jason A . Donenfeld" <Jason@...c4.com>,
"Mark Rutland" <mark.rutland@....com>,
Jacob Pan <jacob.jun.pan@...ux.intel.com>,
Jiapeng Chong <jiapeng.chong@...ux.alibaba.com>,
Jane Malalane <jane.malalane@...rix.com>,
"Woodhouse, David" <dwmw@...zon.co.uk>,
"Ostrovsky, Boris" <boris.ostrovsky@...cle.com>,
Arnaldo Carvalho de Melo <acme@...hat.com>,
Yantengsi <siyanteng@...ngson.cn>,
Christophe Leroy <christophe.leroy@...roup.eu>,
Sathvika Vasireddy <sv@...ux.ibm.com>
Subject: RE: [PATCH RESEND v9 33/36] KVM: VMX: Add VMX_DO_FRED_EVENT_IRQOFF
for IRQ/NMI handling
> > Jumping way back to providing a wrapper for FRED, if we do that, then
> > there's no need to include calling.h, and the weird wrinkle about the
> > ERET target kinda goes away too. E.g. provide this in
> > arch/x86/entry/entry_64_fred.S
> >
> > .section .text, "ax"
> >
> > /* Note, this is instrumentation safe, and returns via RET, not ERETS!
> > */ #if IS_ENABLED(CONFIG_KVM_INTEL)
> > SYM_CODE_START(fred_irq_entry_from_kvm)
> > FRED_ENTER
> > call external_interrupt
> > FRED_EXIT
> > RET
> > SYM_CODE_END(fred_irq_entry_from_kvm)
> > EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm);
> > #endif
> >
> > and then the KVM side for this particular chunk is more simply:
> >
> > lea 1f(%rip), %rax
> > push %rax
> > push $0 /* FRED error code, 0 for NMI and external
> > interrupts */
> >
> > \branch_insn \branch_target
The call instruction here inserts a return address between the FRED
stack frame just created and the GP regs pushed in the FRED wrapper,
thus it doesn't work. I only realize it after looking at the stack layout
in the Intel Simics emulator.
> > 1:
> > VMX_DO_EVENT_FRAME_END
> > RET
>
> > Alternatively, the whole thing could be shoved into
> > arch/x86/entry/entry_64_fred.S, but at a glance I don't think that
> > would be a net positive due to the need to handle IRQs vs. NMIs.
But following your idea of "the whole thing could be shoved into ...",
plus the patch you posted, I get a patch as the following, which works
and gets rid of the 2 warnings of "with modified stack frame".
After calling external_interrupt() and POP_REGS, we can also use ERETS,
thus to unify the stack adjustment.
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index de9e2dc70e40..2b638914c6ed 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -4,12 +4,87 @@
*/
#include <asm/asm.h>
+#include <asm/export.h>
#include <asm/fred.h>
+#include <asm/segment.h>
#include "calling.h"
.code64
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+.macro FRED_ENTRY_FROM_KVM event_func nmi=0
+ ENDBR
+#ifdef CONFIG_X86_FRED
+ push %rbp
+ mov %rsp, %rbp
+
+ /*
+ * Don't check the FRED stack level, the call stack leading to this
+ * helper is effectively constant and shallow (relatively speaking).
+ *
+ * Emulate the FRED-defined redzone and stack alignment.
+ */
+ sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+ and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+ /*
+ * Start to push a FRED stack frame, which is always 64 bytes:
+ *
+ * +--------+-----------------+
+ * | Bytes | Usage |
+ * +--------+-----------------+
+ * | 63:56 | Reserved |
+ * | 55:48 | Event Data |
+ * | 47:40 | SS + Event Info |
+ * | 39:32 | RSP |
+ * | 31:24 | RFLAGS |
+ * | 23:16 | CS + Aux Info |
+ * | 15:8 | RIP |
+ * | 7:0 | Error Code |
+ * +--------+-----------------+
+ */
+ push $0 /* Reserved, must be 0 */
+ push $0 /* Event data, 0 for IRQ/NMI */
+
+ shl $32, %rdi /* Event type and vector */
+ .if \nmi
+ bts $FRED_SSX_NMI_BIT, %rdi
+ .endif
+ bts $FRED_SSX_64_BIT_MODE_BIT, %rdi
+ or $__KERNEL_DS, %rdi
+ push %rdi
+ push %rbp
+ pushf
+ mov $__KERNEL_CS, %rax
+ push %rax
+
+ /*
+ * Unlike the IDT event delivery, FRED _always_ pushes an error code
+ * after pushing the return RIP, thus the CALL instruction CANNOT be
+ * used here to push the return RIP, otherwise there is no chance to
+ * push an error code before invoking the IRQ/NMI handler.
+ *
+ * Use LEA to get the return RIP and push it, then push an error code.
+ */
+ lea 1f(%rip), %rax
+ push %rax /* Return RIP */
+ push $0 /* Error code, 0 for IRQ/NMI */
+
+ PUSH_AND_CLEAR_REGS
+ movq %rsp, %rdi /* %rdi -> pt_regs */
+ call \event_func
+ POP_REGS
+ ERETS
+1:
+ pop %rbp
+ RET
+#else /* CONFIG_X86_FRED */
+ ud2
+#endif /* CONFIG_X86_FRED */
+.endm
+#endif /* CONFIG_KVM_INTEL */
+
.macro FRED_ENTER
UNWIND_HINT_END_OF_STACK
ENDBR
@@ -22,6 +97,16 @@
POP_REGS
.endm
+ .section .text, "ax"
+
+/* Note, this is instrumentation safe, and returns via RET */
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_CODE_START(fred_irq_entry_from_kvm)
+ FRED_ENTRY_FROM_KVM external_interrupt
+SYM_CODE_END(fred_irq_entry_from_kvm)
+EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm);
+#endif
+
.section .noinstr.text, "ax"
/*
@@ -55,3 +140,10 @@ SYM_CODE_START_NOALIGN(fred_entrypoint_kernel)
FRED_EXIT
ERETS
SYM_CODE_END(fred_entrypoint_kernel)
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_CODE_START(fred_nmi_entry_from_kvm)
+ FRED_ENTRY_FROM_KVM fred_exc_nmi nmi=1
+SYM_CODE_END(fred_nmi_entry_from_kvm)
+EXPORT_SYMBOL_GPL(fred_nmi_entry_from_kvm);
+#endif
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 3c91f0eae62e..c7288213de14 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -122,6 +122,10 @@ DECLARE_FRED_HANDLER(fred_exc_double_fault);
extern asmlinkage __visible void fred_entrypoint_user(void);
extern asmlinkage __visible void fred_entrypoint_kernel(void);
+/* For KVM VMX to handle IRQs in IRQ induced VM exits */
+extern asmlinkage __visible void fred_irq_entry_from_kvm(unsigned int event_info);
+extern asmlinkage __visible void fred_nmi_entry_from_kvm(unsigned int event_info);
+
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_X86_FRED */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index be275a0410a8..f6f557c6329e 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -40,6 +40,20 @@
push %_ASM_BP
mov %_ASM_SP, %_ASM_BP
+ /*
+ * Start to push an IDT IRQ/NMI stack frame, which is 40 bytes on
+ * x86_64 and 24 bytes on x86_32:
+ *
+ * +-------+-------------------+
+ * | Bytes | Usage |
+ * +-------+-------------------+
+ * | 39:32 | SS (x86_64 only) |
+ * | 31:24 | RSP (x86_64 only) |
+ * | 23:16 | RFLAGS |
+ * | 15:8 | CS |
+ * | 7:0 | RIP |
+ * +-------+-------------------+
+ */
#ifdef CONFIG_X86_64
/*
* Align RSP to a 16-byte boundary (to emulate CPU behavior) before
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index df461f387e20..83c89239fcff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6961,14 +6961,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
u32 intr_info = vmx_get_intr_info(vcpu);
unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
- gate_desc *desc = (gate_desc *)host_idt_base + vector;
if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- vmx_do_interrupt_irqoff(gate_offset(desc));
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_irq_entry_from_kvm((EVENT_TYPE_HWINT << 16) | vector);
+ else
+ vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@@ -7254,7 +7256,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(vmx_get_intr_info(vcpu))) {
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_nmi_entry_from_kvm((EVENT_TYPE_NMI << 16) | NMI_VECTOR);
+ else
+ vmx_do_nmi_irqoff();
kvm_after_interrupt(vcpu);
}
Powered by blists - more mailing lists