[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260107235724.28101-2-aidan@aktech.ai>
Date: Wed, 7 Jan 2026 19:57:24 -0400
From: Aidan Khoury <aidan@...ech.ai>
To: linux-kernel@...r.kernel.org,
kvm@...r.kernel.org
Cc: Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
x86@...nel.org,
"H. Peter Anvin" <hpa@...or.com>,
Aidan Khoury <aidan@...ers.engineering>,
Nick Peterson <everdox@...il.com>,
Aidan Khoury <aidan@...ech.ai>
Subject: [PATCH v1 1/1] KVM: x86: Merge pending debug causes when vectoring #DB
Intel VMX records deferred debug exception causes in the VMCS field
GUEST_PENDING_DBG_EXCEPTIONS (B0-B3, enabled breakpoint, BS, RTM). This
state is used when debug exceptions are suppressed (e.g. by MOV SS / STI
interruptibility) and later become deliverable.
See Intel SDM Vol. 3C, 27.3.1.5 Checks on Guest Non-Register State
and Intel SDM Vol. 3C, 27.7.3 Delivery of Pending Debug Exceptions after VM Entry
KVM may vector a #DB exception after a VM-exit and/or instruction
emulation. In particular, after a MOV SS that encounters a data
breakpoint (and thus suppresses delivery for one instruction), the
following instruction may cause a VM-exit and be emulated (e.g. CPUID),
or it may be intercepted directly (e.g. ICEBP/INT1). In these flows,
VMX retains the deferred breakpoint cause in GUEST_PENDING_DBG_EXCEPTIONS
while KVM generates a #DB for single-step (BS). Prior to this change, the
resulting in guest DR6 missing B0-B3 even though bare metal reports the
combined reasons (e.g. BS+B0).
Fix this by merging pending debug causes from GUEST_PENDING_DBG_EXCEPTIONS
into the #DB payload when vectoring the #DB exception so the guest always
observes all accumulated reasons in DR6. The merging is done in the
kvm_deliver_exception_payload() function to cover all injection paths
where the payload may be consumed immediately by kvm_multiple_exception().
Reported-by: Nick Peterson <everdox@...il.com>
Signed-off-by: Aidan Khoury <aidan@...ech.ai>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/vmx/main.c | 9 +++++++++
arch/x86/kvm/vmx/vmx.c | 16 +++++++++++-----
arch/x86/kvm/vmx/x86_ops.h | 1 +
arch/x86/kvm/x86.c | 12 ++++++++++++
6 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index fdf178443f85..82fddf2fe61b 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -50,6 +50,7 @@ KVM_X86_OP(get_gdt)
KVM_X86_OP(set_gdt)
KVM_X86_OP(sync_dirty_debug_regs)
KVM_X86_OP(set_dr7)
+KVM_X86_OP_OPTIONAL_RET0(get_pending_dbg_exceptions)
KVM_X86_OP(cache_reg)
KVM_X86_OP(get_rflags)
KVM_X86_OP(set_rflags)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b74ae7183f3a..d4d0aa0a3a4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1768,6 +1768,7 @@ struct kvm_x86_ops {
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
+ unsigned long (*get_pending_dbg_exceptions)(struct kvm_vcpu *vcpu);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 0eb2773b2ae2..1cd30f8e3625 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -465,6 +465,14 @@ static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
vmx_set_dr7(vcpu, val);
}
+static unsigned long vt_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu)
+{
+ if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
+ return 0;
+
+ return vmx_get_pending_dbg_exceptions(vcpu);
+}
+
static void vt_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
/*
@@ -907,6 +915,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_gdt = vt_op(get_gdt),
.set_gdt = vt_op(set_gdt),
.set_dr7 = vt_op(set_dr7),
+ .get_pending_dbg_exceptions = vt_op(get_pending_dbg_exceptions),
.sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs),
.cache_reg = vt_op(cache_reg),
.get_rflags = vt_op(get_rflags),
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 91b6f2f3edc2..1b2e274fe317 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5300,13 +5300,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* have already expired. Note, the CPU sets/clears BS
* as appropriate for all other VM-Exits types.
*/
+ if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
if (is_icebp(intr_info))
WARN_ON(!skip_emulated_instruction(vcpu));
- else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
- (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
return 1;
@@ -5613,6 +5613,12 @@ void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
vmcs_writel(GUEST_DR7, val);
}
+unsigned long vmx_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu)
+{
+ return vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) &
+ (DR6_RTM | DR6_BS | BIT(12) /*Enabled breakpoint*/ | DR_TRAP_BITS);
+}
+
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
kvm_apic_update_ppr(vcpu);
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 9697368d65b3..365682799d05 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -75,6 +75,7 @@ void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val);
void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val);
+unsigned long vmx_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu);
void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu);
void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19d2d6d9e64a..c889dffe4e59 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -765,11 +765,23 @@ static int exception_type(int vector)
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
struct kvm_queued_exception *ex)
{
+ unsigned long pending_dbg;
+
if (!ex->has_payload)
return;
switch (ex->vector) {
case DB_VECTOR:
+ /*
+ * VMX records deferred debug causes (B0-B3, enabled breakpoint,
+ * BS, RTM) in the vmcs.PENDING_DBG_EXCEPTIONS field. Merge any
+ * pending causes into the exception payload so the guest may
+ * see all accumulated reasons in DR6 when the #DB is vectored.
+ */
+ pending_dbg = kvm_x86_call(get_pending_dbg_exceptions)(vcpu);
+ if (pending_dbg)
+ ex->payload |= pending_dbg;
+
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
--
2.43.0
Powered by blists - more mailing lists