lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 14 May 2020 16:30:53 +0800
From:   Like Xu <like.xu@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Paolo Bonzini <pbonzini@...hat.com>
Cc:     linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>,
        Thomas Gleixner <tglx@...utronix.de>, ak@...ux.intel.com,
        wei.w.wang@...el.com, Like Xu <like.xu@...ux.intel.com>
Subject: [PATCH v11 10/11] KVM: x86/pmu: Check guest LBR availability in case host reclaims them

When the guest LBR event exists and the LBR stack is available (defined
as 'event->oncpu != -1'), the LBR records msrs access would not be
intercepted but passthrough to the vcpu before vm-entry. This kind
of availability check is always performed before vm-entry, but as late
as possible to avoid reclaiming resources from any higher priority event.
A negative check result would bring the registers interception back, and
it also prevents real registers accesses and potential data leakage.

The KVM emits a pr_warn() on the host when the guest LBR is unavailable.
The administer is supposed to reminder users that the guest result may be
inaccurate if someone is using LBR to record hypervisor on the host side.

Suggested-by: Wei Wang <wei.w.wang@...el.com>
Signed-off-by: Like Xu <like.xu@...ux.intel.com>
---
 arch/x86/kvm/pmu.h           |  1 +
 arch/x86/kvm/vmx/pmu_intel.c | 38 ++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c       |  4 +++-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 78f0cfe1622f..bb3f3ef3386e 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -42,6 +42,7 @@ struct kvm_pmu_ops {
 	void (*reset)(struct kvm_vcpu *vcpu);
 	void (*deliver_pmi)(struct kvm_vcpu *vcpu);
 	void (*lbr_cleanup)(struct kvm_vcpu *vcpu);
+	void (*availability_check)(struct kvm_vcpu *vcpu);
 };
 
 static inline bool event_is_oncpu(struct perf_event *event)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index ea4faae56473..db185dca903d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -646,6 +646,43 @@ static void intel_pmu_lbr_cleanup(struct kvm_vcpu *vcpu)
 		intel_pmu_free_lbr_event(vcpu);
 }
 
+static bool intel_pmu_lbr_is_availabile(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+	if (!pmu->lbr_event)
+		return false;
+
+	if (event_is_oncpu(pmu->lbr_event)) {
+		intel_pmu_intercept_lbr_msrs(vcpu, false);
+	} else {
+		intel_pmu_intercept_lbr_msrs(vcpu, true);
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Higher priority host perf events (e.g. cpu pinned) could reclaim the
+ * pmu resources (e.g. LBR) that were assigned to the guest. This is
+ * usually done via ipi calls (more details in perf_install_in_context).
+ *
+ * Before entering the non-root mode (with irq disabled here), double
+ * confirm that the pmu features enabled to the guest are not reclaimed
+ * by higher priority host events. Otherwise, disallow vcpu's access to
+ * the reclaimed features.
+ */
+static void intel_pmu_availability_check(struct kvm_vcpu *vcpu)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (lbr_is_enabled(vcpu) && !intel_pmu_lbr_is_availabile(vcpu) &&
+		(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
+		pr_warn_ratelimited("kvm: vcpu-%d: LBR is temporarily unavailable.\n",
+			vcpu->vcpu_id);
+}
+
 struct kvm_pmu_ops intel_pmu_ops = {
 	.find_arch_event = intel_find_arch_event,
 	.find_fixed_event = intel_find_fixed_event,
@@ -662,4 +699,5 @@ struct kvm_pmu_ops intel_pmu_ops = {
 	.reset = intel_pmu_reset,
 	.deliver_pmi = intel_pmu_deliver_pmi,
 	.lbr_cleanup = intel_pmu_lbr_cleanup,
+	.availability_check = intel_pmu_availability_check,
 };
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9969d663826a..80d036c5f64a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6696,8 +6696,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	pt_guest_enter(vmx);
 
-	if (vcpu_to_pmu(vcpu)->version)
+	if (vcpu_to_pmu(vcpu)->version) {
 		atomic_switch_perf_msrs(vmx);
+		kvm_x86_ops.pmu_ops->availability_check(vcpu);
+	}
 
 	atomic_switch_umwait_control_msr(vmx);
 
-- 
2.21.3

Powered by blists - more mailing lists