[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250211025828.3072076-5-binbin.wu@linux.intel.com>
Date: Tue, 11 Feb 2025 10:58:15 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: pbonzini@...hat.com,
seanjc@...gle.com,
kvm@...r.kernel.org
Cc: rick.p.edgecombe@...el.com,
kai.huang@...el.com,
adrian.hunter@...el.com,
reinette.chatre@...el.com,
xiaoyao.li@...el.com,
tony.lindgren@...el.com,
isaku.yamahata@...el.com,
yan.y.zhao@...el.com,
chao.gao@...el.com,
linux-kernel@...r.kernel.org,
binbin.wu@...ux.intel.com
Subject: [PATCH v2 04/17] KVM: TDX: Implement non-NMI interrupt injection
From: Isaku Yamahata <isaku.yamahata@...el.com>
Implement non-NMI interrupt injection for TDX via posted interrupt.
As CPU state is protected and APICv is enabled for the TDX guest, TDX
supports non-NMI interrupt injection only by posted interrupt. Posted
interrupt descriptors (PIDs) are allocated in shared memory, KVM can
update them directly. If target vCPU is in non-root mode, send posted
interrupt notification to the vCPU and hardware will sync PIR to vIRR
atomically. Otherwise, kick it to pick up the interrupt from PID. To
post pending interrupts in the PID, KVM can generate a self-IPI with
notification vector prior to TD entry.
Since the guest status of TD vCPU is protected, assume interrupt is
always allowed. Ignore the code path for event injection mechanism or
LAPIC emulation for TDX.
Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
Co-developed-by: Binbin Wu <binbin.wu@...ux.intel.com>
Signed-off-by: Binbin Wu <binbin.wu@...ux.intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@...hat.com>
---
TDX interrupts v2:
- Rebased due to moving pi_desc to vcpu_vt.
TDX interrupts v1:
- Renamed from "KVM: TDX: Implement interrupt injection"
to "KVM: TDX: Implement non-NMI interrupt injection"
- Rewrite changelog.
- Add a blank line. (Binbin)
- Split posted interrupt delivery code movement to a separate patch.
- Split kvm_wait_lapic_expire() out to a separate patch. (Chao)
- Use __pi_set_sn() to resolve upstream conflicts.
- Use kvm_x86_call()
---
arch/x86/kvm/vmx/main.c | 94 ++++++++++++++++++++++++++++++----
arch/x86/kvm/vmx/posted_intr.c | 2 +-
arch/x86/kvm/vmx/posted_intr.h | 2 +
arch/x86/kvm/vmx/tdx.c | 23 ++++++++-
arch/x86/kvm/vmx/vmx.c | 8 ---
arch/x86/kvm/vmx/x86_ops.h | 6 +++
6 files changed, 116 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 2b1ea57a3a4e..3d590b580e2f 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -180,6 +180,34 @@ static int vt_handle_exit(struct kvm_vcpu *vcpu,
return vmx_handle_exit(vcpu, fastpath);
}
+static void vt_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi = vcpu_to_pi_desc(vcpu);
+
+ pi_clear_on(pi);
+ memset(pi->pir, 0, sizeof(pi->pir));
+}
+
+static int vt_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return -1;
+
+ return vmx_sync_pir_to_irr(vcpu);
+}
+
+static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ if (is_td_vcpu(apic->vcpu)) {
+ tdx_deliver_interrupt(apic, delivery_mode, trig_mode,
+ vector);
+ return;
+ }
+
+ vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
+}
+
static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
{
if (is_td_vcpu(vcpu)) {
@@ -227,6 +255,54 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
}
+static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+ if (is_td_vcpu(vcpu))
+ return;
+
+ vmx_set_interrupt_shadow(vcpu, mask);
+}
+
+static u32 vt_get_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return 0;
+
+ return vmx_get_interrupt_shadow(vcpu);
+}
+
+static void vt_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
+{
+ if (is_td_vcpu(vcpu))
+ return;
+
+ vmx_inject_irq(vcpu, reinjected);
+}
+
+static void vt_cancel_injection(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return;
+
+ vmx_cancel_injection(vcpu);
+}
+
+static int vt_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ if (is_td_vcpu(vcpu))
+ return true;
+
+ return vmx_interrupt_allowed(vcpu, for_injection);
+}
+
+static void vt_enable_irq_window(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return;
+
+ vmx_enable_irq_window(vcpu);
+}
+
static void vt_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
{
*intr_info = 0;
@@ -347,19 +423,19 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.handle_exit = vt_handle_exit,
.skip_emulated_instruction = vmx_skip_emulated_instruction,
.update_emulated_instruction = vmx_update_emulated_instruction,
- .set_interrupt_shadow = vmx_set_interrupt_shadow,
- .get_interrupt_shadow = vmx_get_interrupt_shadow,
+ .set_interrupt_shadow = vt_set_interrupt_shadow,
+ .get_interrupt_shadow = vt_get_interrupt_shadow,
.patch_hypercall = vmx_patch_hypercall,
- .inject_irq = vmx_inject_irq,
+ .inject_irq = vt_inject_irq,
.inject_nmi = vmx_inject_nmi,
.inject_exception = vmx_inject_exception,
- .cancel_injection = vmx_cancel_injection,
- .interrupt_allowed = vmx_interrupt_allowed,
+ .cancel_injection = vt_cancel_injection,
+ .interrupt_allowed = vt_interrupt_allowed,
.nmi_allowed = vmx_nmi_allowed,
.get_nmi_mask = vmx_get_nmi_mask,
.set_nmi_mask = vmx_set_nmi_mask,
.enable_nmi_window = vmx_enable_nmi_window,
- .enable_irq_window = vmx_enable_irq_window,
+ .enable_irq_window = vt_enable_irq_window,
.update_cr8_intercept = vmx_update_cr8_intercept,
.x2apic_icr_is_split = false,
@@ -367,11 +443,11 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
- .apicv_pre_state_restore = vmx_apicv_pre_state_restore,
+ .apicv_pre_state_restore = vt_apicv_pre_state_restore,
.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
.hwapic_isr_update = vmx_hwapic_isr_update,
- .sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_interrupt = vmx_deliver_interrupt,
+ .sync_pir_to_irr = vt_sync_pir_to_irr,
+ .deliver_interrupt = vt_deliver_interrupt,
.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt,
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 25f8a19e2831..895bbe85b818 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -32,7 +32,7 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
*/
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
-static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
return &(to_vt(vcpu)->pi_desc);
}
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index ad9116a99bcc..68605ca7ef68 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -5,6 +5,8 @@
#include <linux/bitmap.h>
#include <asm/posted_intr.h>
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu);
+
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
void pi_wakeup_handler(void);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 825f13371134..d289040172bc 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -685,6 +685,10 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
vcpu->arch.xfd_no_write_intercept = true;
+
+ tdx->vt.pi_desc.nv = POSTED_INTR_VECTOR;
+ __pi_set_sn(&tdx->vt.pi_desc);
+
tdx->state = VCPU_TD_STATE_UNINITIALIZED;
return 0;
@@ -694,6 +698,7 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
+ vmx_vcpu_pi_load(vcpu, cpu);
if (vcpu->cpu == cpu)
return;
@@ -950,6 +955,9 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
trace_kvm_entry(vcpu, force_immediate_exit);
+ if (pi_test_on(&vt->pi_desc))
+ apic->send_IPI_self(POSTED_INTR_VECTOR);
+
tdx_vcpu_enter_exit(vcpu);
if (vt->host_debugctlmsr & ~TDX_DEBUGCTL_PRESERVED)
@@ -1607,6 +1615,16 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
return tdx_sept_drop_private_spte(kvm, gfn, level, pfn_to_page(pfn));
}
+void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+ /* TDX supports only posted interrupt. No lapic emulation. */
+ __vmx_deliver_posted_interrupt(vcpu, &tdx->vt.pi_desc, vector);
+}
+
int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
@@ -2578,8 +2596,11 @@ static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
/* TODO: freeze vCPU model before kvm_update_cpuid_runtime() */
kvm_update_cpuid_runtime(vcpu);
- tdx->state = VCPU_TD_STATE_INITIALIZED;
+ td_vmcs_write16(tdx, POSTED_INTR_NV, POSTED_INTR_VECTOR);
+ td_vmcs_write64(tdx, POSTED_INTR_DESC_ADDR, __pa(&tdx->vt.pi_desc));
+ td_vmcs_setbit32(tdx, PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_POSTED_INTR);
+ tdx->state = VCPU_TD_STATE_INITIALIZED;
return 0;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 77108ab24c66..cb6043e29ef9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6902,14 +6902,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}
-void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vt *vt = to_vt(vcpu);
-
- pi_clear_on(&vt->pi_desc);
- memset(vt->pi_desc.pir, 0, sizeof(vt->pi_desc.pir));
-}
-
void vmx_do_interrupt_irqoff(unsigned long entry);
void vmx_do_nmi_irqoff(void);
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 8086e5c58cd6..d521ad276d51 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -138,6 +138,9 @@ void tdx_vcpu_put(struct kvm_vcpu *vcpu);
bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu);
int tdx_handle_exit(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion fastpath);
+
+void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector);
void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
@@ -177,6 +180,9 @@ static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {}
static inline bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu) { return false; }
static inline int tdx_handle_exit(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion fastpath) { return 0; }
+
+static inline void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector) {}
static inline void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1,
u64 *info2, u32 *intr_info, u32 *error_code) {}
--
2.46.0
Powered by blists - more mailing lists