lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260107235724.28101-2-aidan@aktech.ai>
Date: Wed,  7 Jan 2026 19:57:24 -0400
From: Aidan Khoury <aidan@...ech.ai>
To: linux-kernel@...r.kernel.org,
	kvm@...r.kernel.org
Cc: Sean Christopherson <seanjc@...gle.com>,
	Paolo Bonzini <pbonzini@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	Borislav Petkov <bp@...en8.de>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	x86@...nel.org,
	"H. Peter Anvin" <hpa@...or.com>,
	Aidan Khoury <aidan@...ers.engineering>,
	Nick Peterson <everdox@...il.com>,
	Aidan Khoury <aidan@...ech.ai>
Subject: [PATCH v1 1/1] KVM: x86: Merge pending debug causes when vectoring #DB

Intel VMX records deferred debug exception causes in the VMCS field
GUEST_PENDING_DBG_EXCEPTIONS (B0-B3, enabled breakpoint, BS, RTM). This
state is used when debug exceptions are suppressed (e.g. by MOV SS / STI
interruptibility) and later become deliverable.

See Intel SDM Vol. 3C, 27.3.1.5 Checks on Guest Non-Register State
and Intel SDM Vol. 3C, 27.7.3 Delivery of Pending Debug Exceptions after VM Entry

KVM may vector a #DB exception after a VM-exit and/or instruction
emulation. In particular, after a MOV SS that encounters a data
breakpoint (and thus suppresses delivery for one instruction), the
following instruction may cause a VM-exit and be emulated (e.g. CPUID),
or it may be intercepted directly (e.g. ICEBP/INT1). In these flows,
VMX retains the deferred breakpoint cause in GUEST_PENDING_DBG_EXCEPTIONS
while KVM generates a #DB for single-step (BS). Prior to this change, the
resulting in guest DR6 missing B0-B3 even though bare metal reports the
combined reasons (e.g. BS+B0).

Fix this by merging pending debug causes from GUEST_PENDING_DBG_EXCEPTIONS
into the #DB payload when vectoring the #DB exception so the guest always
observes all accumulated reasons in DR6. The merging is done in the
kvm_deliver_exception_payload() function to cover all injection paths
where the payload may be consumed immediately by kvm_multiple_exception().

Reported-by: Nick Peterson <everdox@...il.com>
Signed-off-by: Aidan Khoury <aidan@...ech.ai>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/kvm/vmx/main.c            |  9 +++++++++
 arch/x86/kvm/vmx/vmx.c             | 16 +++++++++++-----
 arch/x86/kvm/vmx/x86_ops.h         |  1 +
 arch/x86/kvm/x86.c                 | 12 ++++++++++++
 6 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index fdf178443f85..82fddf2fe61b 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -50,6 +50,7 @@ KVM_X86_OP(get_gdt)
 KVM_X86_OP(set_gdt)
 KVM_X86_OP(sync_dirty_debug_regs)
 KVM_X86_OP(set_dr7)
+KVM_X86_OP_OPTIONAL_RET0(get_pending_dbg_exceptions)
 KVM_X86_OP(cache_reg)
 KVM_X86_OP(get_rflags)
 KVM_X86_OP(set_rflags)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b74ae7183f3a..d4d0aa0a3a4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1768,6 +1768,7 @@ struct kvm_x86_ops {
 	void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 	void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
 	void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
+	unsigned long (*get_pending_dbg_exceptions)(struct kvm_vcpu *vcpu);
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 0eb2773b2ae2..1cd30f8e3625 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -465,6 +465,14 @@ static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
 	vmx_set_dr7(vcpu, val);
 }
 
+static unsigned long vt_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu)
+{
+	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
+		return 0;
+
+	return vmx_get_pending_dbg_exceptions(vcpu);
+}
+
 static void vt_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -907,6 +915,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 	.get_gdt = vt_op(get_gdt),
 	.set_gdt = vt_op(set_gdt),
 	.set_dr7 = vt_op(set_dr7),
+	.get_pending_dbg_exceptions = vt_op(get_pending_dbg_exceptions),
 	.sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs),
 	.cache_reg = vt_op(cache_reg),
 	.get_rflags = vt_op(get_rflags),
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 91b6f2f3edc2..1b2e274fe317 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5300,13 +5300,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 			 * have already expired.  Note, the CPU sets/clears BS
 			 * as appropriate for all other VM-Exits types.
 			 */
+			if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+			    (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			     (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
+				vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+				 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
 			if (is_icebp(intr_info))
 				WARN_ON(!skip_emulated_instruction(vcpu));
-			else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
-				 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-				  (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
-				vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
-					    vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
 
 			kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
 			return 1;
@@ -5613,6 +5613,12 @@ void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
 	vmcs_writel(GUEST_DR7, val);
 }
 
+unsigned long vmx_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu)
+{
+	return vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) &
+		(DR6_RTM | DR6_BS | BIT(12) /*Enabled breakpoint*/ | DR_TRAP_BITS);
+}
+
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
 {
 	kvm_apic_update_ppr(vcpu);
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 9697368d65b3..365682799d05 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -75,6 +75,7 @@ void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val);
 void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val);
+unsigned long vmx_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu);
 void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu);
 void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19d2d6d9e64a..c889dffe4e59 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -765,11 +765,23 @@ static int exception_type(int vector)
 void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
 				   struct kvm_queued_exception *ex)
 {
+	unsigned long pending_dbg;
+
 	if (!ex->has_payload)
 		return;
 
 	switch (ex->vector) {
 	case DB_VECTOR:
+		/*
+		 * VMX records deferred debug causes (B0-B3, enabled breakpoint,
+		 * BS, RTM) in the vmcs.PENDING_DBG_EXCEPTIONS field.  Merge any
+		 * pending causes into the exception payload so the guest may
+		 * see all accumulated reasons in DR6 when the #DB is vectored.
+		 */
+		pending_dbg = kvm_x86_call(get_pending_dbg_exceptions)(vcpu);
+		if (pending_dbg)
+			ex->payload |= pending_dbg;
+
 		/*
 		 * "Certain debug exceptions may clear bit 0-3.  The
 		 * remaining contents of the DR6 register are never
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ