linux-kernel - [PATCH 07/15] KVM: Use dedicated flag to track if KVM is handling an NMI from guest

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210827005718.585190-8-seanjc@google.com>
Date:   Thu, 26 Aug 2021 17:57:10 -0700
From:   Sean Christopherson <seanjc@...gle.com>
To:     Will Deacon <will@...nel.org>, Mark Rutland <mark.rutland@....com>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Catalin Marinas <catalin.marinas@....com>,
        Marc Zyngier <maz@...nel.org>, Guo Ren <guoren@...nel.org>,
        Nick Hu <nickhu@...estech.com>,
        Greentime Hu <green.hu@...il.com>,
        Vincent Chen <deanbo422@...il.com>,
        Paul Walmsley <paul.walmsley@...ive.com>,
        Palmer Dabbelt <palmer@...belt.com>,
        Albert Ou <aou@...s.berkeley.edu>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>, x86@...nel.org,
        Paolo Bonzini <pbonzini@...hat.com>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Juergen Gross <jgross@...e.com>
Cc:     Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        James Morse <james.morse@....com>,
        Alexandru Elisei <alexandru.elisei@....com>,
        Suzuki K Poulose <suzuki.poulose@....com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>,
        Stefano Stabellini <sstabellini@...nel.org>,
        linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.cs.columbia.edu,
        linux-csky@...r.kernel.org, linux-riscv@...ts.infradead.org,
        kvm@...r.kernel.org, xen-devel@...ts.xenproject.org,
        Artem Kashkanov <artem.kashkanov@...el.com>,
        Like Xu <like.xu.linux@...il.com>,
        Zhu Lingshan <lingshan.zhu@...el.com>
Subject: [PATCH 07/15] KVM: Use dedicated flag to track if KVM is handling an
 NMI from guest

Add a dedicated flag to detect the case where KVM's PMC overflow
callback was originally invoked in response to an NMI that arrived while
the guest was running.  Using current_vcpu is less precise as IRQs also
set current_vcpu (though presumably KVM's callback should not be reached
in that case), and more importantly, this will allow dropping
current_vcpu as the perf callbacks can switch to kvm_running_vcpu now
that the perf callbacks are precisely registered, i.e. kvm_running_vcpu
doesn't need to be used to detect if a PMI arrived in the guest.

Fixes: dd60d217062f ("KVM: x86: Fix perf timer mode IP reporting")
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +--
 arch/x86/kvm/pmu.c              | 2 +-
 arch/x86/kvm/svm/svm.c          | 2 +-
 arch/x86/kvm/vmx/vmx.c          | 2 +-
 arch/x86/kvm/x86.c              | 4 ++--
 arch/x86/kvm/x86.h              | 4 +++-
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1ea4943a73d7..465b35736d9b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -763,6 +763,7 @@ struct kvm_vcpu_arch {
 	unsigned nmi_pending; /* NMI queued after currently running handler */
 	bool nmi_injected;    /* Trying to inject an NMI this entry */
 	bool smi_pending;    /* SMI queued after currently running handler */
+	bool handling_nmi_from_guest;
 
 	struct kvm_mtrr mtrr_state;
 	u64 pat;
@@ -1874,8 +1875,6 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
 
-int kvm_is_in_guest(void);
-
 void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
 				     u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0772bad9165c..2b8934b452ea 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -87,7 +87,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
 		 * woken up. So we should wake it, but this is impossible from
 		 * NMI context. Do it from irq work instead.
 		 */
-		if (!kvm_is_in_guest())
+		if (!pmc->vcpu->arch.handling_nmi_from_guest)
 			irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
 		else
 			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1a70e11f0487..3fc6767e5fd8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3843,7 +3843,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 	}
 
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
-		kvm_before_interrupt(vcpu);
+		kvm_before_interrupt(vcpu, true);
 
 	kvm_load_host_xsave_state(vcpu);
 	stgi();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f19d72136f77..f08980ef7c44 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6344,7 +6344,7 @@ void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
 static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
 					unsigned long entry)
 {
-	kvm_before_interrupt(vcpu);
+	kvm_before_interrupt(vcpu, entry == (unsigned long)asm_exc_nmi_noist);
 	vmx_do_interrupt_nmi_irqoff(entry);
 	kvm_after_interrupt(vcpu);
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bc4ee6ea7752..d4d91944fde7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8267,7 +8267,7 @@ static void kvm_timer_init(void)
 DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
 
-int kvm_is_in_guest(void)
+static int kvm_is_in_guest(void)
 {
 	return __this_cpu_read(current_vcpu) != NULL;
 }
@@ -9678,7 +9678,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	 * interrupts on processors that implement an interrupt shadow, the
 	 * stat.exits increment will do nicely.
 	 */
-	kvm_before_interrupt(vcpu);
+	kvm_before_interrupt(vcpu, false);
 	local_irq_enable();
 	++vcpu->stat.exits;
 	local_irq_disable();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5cedc0e8a5d5..4c5ba4128b38 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -395,9 +395,10 @@ static inline void kvm_unregister_perf_callbacks(void)
 
 DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 
-static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
+static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu, bool is_nmi)
 {
 	__this_cpu_write(current_vcpu, vcpu);
+	WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, is_nmi);
 
 	kvm_register_perf_callbacks();
 }
@@ -406,6 +407,7 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
 {
 	kvm_unregister_perf_callbacks();
 
+	WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, false);
 	__this_cpu_write(current_vcpu, NULL);
 }
 
-- 
2.33.0.259.gc128427fd7-goog