[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250821223630.984383-12-xin@zytor.com>
Date: Thu, 21 Aug 2025 15:36:20 -0700
From: "Xin Li (Intel)" <xin@...or.com>
To: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-doc@...r.kernel.org
Cc: pbonzini@...hat.com, seanjc@...gle.com, corbet@....net, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
x86@...nel.org, hpa@...or.com, xin@...or.com, luto@...nel.org,
peterz@...radead.org, andrew.cooper3@...rix.com, chao.gao@...el.com,
hch@...radead.org
Subject: [PATCH v6 11/20] KVM: VMX: Virtualize FRED nested exception tracking
From: Xin Li <xin3.li@...el.com>
Set the VMX nested exception bit in VM-entry interruption information
field when injecting a nested exception using FRED event delivery to
ensure:
1) A nested exception is injected on a correct stack level.
2) The nested bit defined in FRED stack frame is set.
The event stack level used by FRED event delivery depends on whether
the event was a nested exception encountered during delivery of an
earlier event, because a nested exception is "regarded" as happening
on ring 0. E.g., when #PF is configured to use stack level 1 in
IA32_FRED_STKLVLS MSR:
- nested #PF will be delivered on the stack pointed by IA32_FRED_RSP1
MSR when encountered in ring 3 and ring 0.
- normal #PF will be delivered on the stack pointed by IA32_FRED_RSP0
MSR when encountered in ring 3.
The VMX nested-exception support ensures a correct event stack level is
chosen when a VM entry injects a nested exception.
Signed-off-by: Xin Li <xin3.li@...el.com>
[ Sean: reworked kvm_requeue_exception() to simply the code changes ]
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
Signed-off-by: Xin Li (Intel) <xin@...or.com>
Tested-by: Shan Kang <shan.kang@...el.com>
Tested-by: Xuelian Guo <xuelian.guo@...el.com>
---
Change in v5:
* Add TB from Xuelian Guo.
Change in v4:
* Move the check is_fred_enable() from kvm_multiple_exception() to
vmx_inject_exception() thus avoid bleeding FRED details into
kvm_multiple_exception() (Chao Gao).
Change in v3:
* Rework kvm_requeue_exception() to simply the code changes (Sean
Christopherson).
Change in v2:
* Set the nested flag when there is an original interrupt (Chao Gao).
---
arch/x86/include/asm/kvm_host.h | 4 +++-
arch/x86/include/asm/vmx.h | 5 ++++-
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 6 +++++-
arch/x86/kvm/x86.c | 13 ++++++++++++-
arch/x86/kvm/x86.h | 1 +
6 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dce6471194f7..6299c43dfbee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -759,6 +759,7 @@ struct kvm_queued_exception {
u32 error_code;
unsigned long payload;
bool has_payload;
+ bool nested;
u64 event_data;
};
@@ -2223,7 +2224,8 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
- bool has_error_code, u32 error_code, u64 event_data);
+ bool has_error_code, u32 error_code, bool nested,
+ u64 event_data);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 539af190ad3e..7b34a9357b28 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -140,6 +140,7 @@
#define VMX_BASIC_INOUT BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
+#define VMX_BASIC_NESTED_EXCEPTION BIT_ULL(58)
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
@@ -442,13 +443,15 @@ enum vmcs_field {
#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */
#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */
+#define INTR_INFO_NESTED_EXCEPTION_MASK 0x2000 /* 13 */
#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
-#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000
+#define INTR_INFO_RESVD_BITS_MASK 0x7fffd000
#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK
#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK
#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
+#define VECTORING_INFO_NESTED_EXCEPTION_MASK INTR_INFO_NESTED_EXCEPTION_MASK
#define INTR_TYPE_EXT_INTR (EVENT_TYPE_EXTINT << 8) /* external interrupt */
#define INTR_TYPE_RESERVED (EVENT_TYPE_RESERVED << 8) /* reserved */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 72f54befd0d0..06961098de42 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4144,7 +4144,7 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
kvm_requeue_exception(vcpu, vector,
exitintinfo & SVM_EXITINTINFO_VALID_ERR,
- error_code, 0);
+ error_code, false, 0);
break;
}
case SVM_EXITINTINFO_TYPE_INTR:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e1eb55fb3fb8..7a7856f06f98 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1857,8 +1857,11 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu)
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
- } else
+ } else {
intr_info |= INTR_TYPE_HARD_EXCEPTION;
+ if (ex->nested && is_fred_enabled(vcpu))
+ intr_info |= INTR_INFO_NESTED_EXCEPTION_MASK;
+ }
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
@@ -7311,6 +7314,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
kvm_requeue_exception(vcpu, vector,
idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK,
error_code,
+ idt_vectoring_info & VECTORING_INFO_NESTED_EXCEPTION_MASK,
event_data);
break;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f082255852a9..fbbfa600e2c2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -871,6 +871,10 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
vcpu->arch.exception.pending = true;
vcpu->arch.exception.injected = false;
+ vcpu->arch.exception.nested = vcpu->arch.exception.nested ||
+ vcpu->arch.nmi_injected ||
+ vcpu->arch.interrupt.injected;
+
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.vector = nr;
vcpu->arch.exception.error_code = error_code;
@@ -900,8 +904,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
vcpu->arch.exception.injected = false;
vcpu->arch.exception.pending = false;
+ /* #DF is NOT a nested event, per its definition. */
+ vcpu->arch.exception.nested = false;
+
kvm_queue_exception_e(vcpu, DF_VECTOR, 0);
} else {
+ vcpu->arch.exception.nested = true;
+
/* replace previous exception with a new one in a hope
that instruction re-execution will regenerate lost
exception */
@@ -930,7 +939,8 @@ static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
}
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
- bool has_error_code, u32 error_code, u64 event_data)
+ bool has_error_code, u32 error_code, bool nested,
+ u64 event_data)
{
/*
@@ -955,6 +965,7 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
+ vcpu->arch.exception.nested = nested;
vcpu->arch.exception.event_data = event_data;
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b6dc23c478ff..685eb710b1f2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -198,6 +198,7 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = false;
+ vcpu->arch.exception.nested = false;
vcpu->arch.exception_vmexit.pending = false;
}
--
2.50.1
Powered by blists - more mailing lists