linux-kernel - [PATCH 12/13] KVM: x86: Replace late check_nested

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200423022550.15113-13-sean.j.christopherson@intel.com>
Date:   Wed, 22 Apr 2020 19:25:49 -0700
From:   Sean Christopherson <sean.j.christopherson@...el.com>
To:     Paolo Bonzini <pbonzini@...hat.com>
Cc:     Sean Christopherson <sean.j.christopherson@...el.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, Oliver Upton <oupton@...gle.com>,
        Peter Shier <pshier@...gle.com>
Subject: [PATCH 12/13] KVM: x86: Replace late check_nested_events() hack with more precise fix

Add a separate hook for checking if interrupt injection is blocked and
use the hook to handle the case where an interrupt arrives between
check_nested_events() and the injection logic.  Drop the retry of
check_nested_events() that hack-a-fixed the same condition.

Blocking injection is also a bit of a hack, e.g. KVM should do exiting
and non-exiting interrupt processing in a single pass, but it's a more
precise hack.  The old comment is also misleading, e.g. KVM_REQ_EVENT is
purely an optimization, setting it on every run loop (which KVM doesn't
do) should not affect functionality, only performance.

Signed-off-by: Sean Christopherson <sean.j.christopherson@...el.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/svm.c          |  1 +
 arch/x86/kvm/vmx/vmx.c          | 13 +++++++++++++
 arch/x86/kvm/x86.c              | 22 ++++------------------
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 787636acd648..16fdeddb4a65 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1140,6 +1140,7 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu);
 	void (*cancel_injection)(struct kvm_vcpu *vcpu);
 	bool (*interrupt_allowed)(struct kvm_vcpu *vcpu);
+	bool (*interrupt_injection_allowed)(struct kvm_vcpu *vcpu);
 	bool (*nmi_allowed)(struct kvm_vcpu *vcpu);
 	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
 	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f21f734861dd..6d3ccbfc9e6a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3993,6 +3993,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.queue_exception = svm_queue_exception,
 	.cancel_injection = svm_cancel_injection,
 	.interrupt_allowed = svm_interrupt_allowed,
+	.interrupt_injection_allowed = svm_interrupt_allowed,
 	.nmi_allowed = svm_nmi_allowed,
 	.get_nmi_mask = svm_get_nmi_mask,
 	.set_nmi_mask = svm_set_nmi_mask,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2f8cacb3aa9b..68b3748b5383 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4550,6 +4550,18 @@ static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 	return !vmx_interrupt_blocked(vcpu);
 }
 
+static bool vmx_interrupt_injection_allowed(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+	 * e.g. if the IRQ arrived asynchronously after checking nested events.
+	 */
+	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+		return false;
+
+	return vmx_interrupt_allowed(vcpu);
+}
+
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
@@ -7823,6 +7835,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.queue_exception = vmx_queue_exception,
 	.cancel_injection = vmx_cancel_injection,
 	.interrupt_allowed = vmx_interrupt_allowed,
+	.interrupt_injection_allowed = vmx_interrupt_injection_allowed,
 	.nmi_allowed = vmx_nmi_allowed,
 	.get_nmi_mask = vmx_get_nmi_mask,
 	.set_nmi_mask = vmx_set_nmi_mask,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7c49a7dc601f..d9d6028a77e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
 		--vcpu->arch.nmi_pending;
 		vcpu->arch.nmi_injected = true;
 		kvm_x86_ops.set_nmi(vcpu);
-	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
-		/*
-		 * Because interrupts can be injected asynchronously, we are
-		 * calling check_nested_events again here to avoid a race condition.
-		 * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
-		 * proposal and current concerns.  Perhaps we should be setting
-		 * KVM_REQ_EVENT only on certain events and not unconditionally?
-		 */
-		if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
-			r = kvm_x86_ops.check_nested_events(vcpu);
-			if (r != 0)
-				return r;
-		}
-		if (kvm_x86_ops.interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
-					    false);
-			kvm_x86_ops.set_irq(vcpu);
-		}
+	} else if (kvm_cpu_has_injectable_intr(vcpu) &&
+		   kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
+		kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
+		kvm_x86_ops.set_irq(vcpu);
 	}
 
 	return 0;
-- 
2.26.0