lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250805190526.1453366-12-seanjc@google.com>
Date: Tue,  5 Aug 2025 12:05:19 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org, Xin Li <xin@...or.com>, 
	Dapeng Mi <dapeng1.mi@...ux.intel.com>, Sandipan Das <sandipan.das@....com>
Subject: [PATCH 11/18] KVM: x86/pmu: Calculate set of to-be-emulated PMCs at
 time of WRMSRs

Calculate and track PMCs that are counting instructions/branches retired
when the PMC's event selector (or fixed counter control) is modified
instead evaluating the event selector on-demand.  Immediately recalc a
PMC's configuration on writes to avoid false negatives/positives when
KVM skips an emulated WRMSR, which is guaranteed to occur before the
main run loop processes KVM_REQ_PMU.

Out of an abundance of caution, and because it's relatively cheap, recalc
reprogrammed PMCs in kvm_pmu_handle_event() as well.  Recalculating in
response to KVM_REQ_PMU _should_ be unnecessary, but for now be paranoid
to avoid introducing easily-avoidable bugs in edge cases.  The code can be
removed in the future if necessary, e.g. in the unlikely event that the
overhead of recalculating to-be-emulated PMCs is noticeable.

Note!  Deliberately don't check the PMU event filters, as doing so could
result in KVM consuming stale information.

Tracking which PMCs are counting branches/instructions will allow grabbing
SRCU in the fastpath VM-Exit handlers if and only if a PMC event might be
triggered (to consult the event filters), and will also allow the upcoming
mediated PMU to do the right thing with respect to counting instructions
(the mediated PMU won't be able to update PMCs in the VM-Exit fastpath).

Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/pmu.c              | 75 ++++++++++++++++++++++++---------
 arch/x86/kvm/pmu.h              |  4 ++
 3 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f19a76d3ca0e..d7680612ba1e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -579,6 +579,9 @@ struct kvm_pmu {
 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
+	DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
+
 	u64 ds_area;
 	u64 pebs_enable;
 	u64 pebs_enable_rsvd;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index e1911b366c43..b0f0275a2c2e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -542,6 +542,47 @@ static int reprogram_counter(struct kvm_pmc *pmc)
 				     eventsel & ARCH_PERFMON_EVENTSEL_INT);
 }
 
+static bool pmc_is_event_match(struct kvm_pmc *pmc, u64 eventsel)
+{
+	/*
+	 * Ignore checks for edge detect (all events currently emulated by KVM
+	 * are always rising edges), pin control (unsupported by modern CPUs),
+	 * and counter mask and its invert flag (KVM doesn't emulate multiple
+	 * events in a single clock cycle).
+	 *
+	 * Note, the uppermost nibble of AMD's mask overlaps Intel's IN_TX (bit
+	 * 32) and IN_TXCP (bit 33), as well as two reserved bits (bits 35:34).
+	 * Checking the "in HLE/RTM transaction" flags is correct as the vCPU
+	 * can't be in a transaction if KVM is emulating an instruction.
+	 *
+	 * Checking the reserved bits might be wrong if they are defined in the
+	 * future, but so could ignoring them, so do the simple thing for now.
+	 */
+	return !((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB);
+}
+
+void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc)
+{
+	bitmap_clear(pmu->pmc_counting_instructions, pmc->idx, 1);
+	bitmap_clear(pmu->pmc_counting_branches, pmc->idx, 1);
+
+	/*
+	 * Do NOT consult the PMU event filters, as the filters must be checked
+	 * at the time of emulation to ensure KVM uses fresh information, e.g.
+	 * omitting a PMC from a bitmap could result in a missed event if the
+	 * filter is changed to allow counting the event.
+	 */
+	if (!pmc_speculative_in_use(pmc))
+		return;
+
+	if (pmc_is_event_match(pmc, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED))
+		bitmap_set(pmu->pmc_counting_instructions, pmc->idx, 1);
+
+	if (pmc_is_event_match(pmc, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED))
+		bitmap_set(pmu->pmc_counting_branches, pmc->idx, 1);
+}
+EXPORT_SYMBOL_GPL(kvm_pmu_recalc_pmc_emulation);
+
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
 	DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
@@ -577,6 +618,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 	 */
 	if (unlikely(pmu->need_cleanup))
 		kvm_pmu_cleanup(vcpu);
+
+	kvm_for_each_pmc(pmu, pmc, bit, bitmap)
+		kvm_pmu_recalc_pmc_emulation(pmu, pmc);
 }
 
 int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
@@ -910,7 +954,8 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
 							 select_user;
 }
 
-static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
+static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu,
+				  const unsigned long *event_pmcs)
 {
 	DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -919,29 +964,17 @@ static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
 
 	BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
 
+	if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX))
+		return;
+
 	if (!kvm_pmu_has_perf_global_ctrl(pmu))
-		bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
-	else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
+		bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX);
+	else if (!bitmap_and(bitmap, event_pmcs,
 			     (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
 		return;
 
 	kvm_for_each_pmc(pmu, pmc, i, bitmap) {
-		/*
-		 * Ignore checks for edge detect (all events currently emulated
-		 * but KVM are always rising edges), pin control (unsupported
-		 * by modern CPUs), and counter mask and its invert flag (KVM
-		 * doesn't emulate multiple events in a single clock cycle).
-		 *
-		 * Note, the uppermost nibble of AMD's mask overlaps Intel's
-		 * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
-		 * bits (bits 35:34).  Checking the "in HLE/RTM transaction"
-		 * flags is correct as the vCPU can't be in a transaction if
-		 * KVM is emulating an instruction.  Checking the reserved bits
-		 * might be wrong if they are defined in the future, but so
-		 * could ignoring them, so do the simple thing for now.
-		 */
-		if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
-		    !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
+		if (!pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
 			continue;
 
 		kvm_pmu_incr_counter(pmc);
@@ -950,13 +983,13 @@ static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
 
 void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu)
 {
-	kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+	kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_instructions);
 }
 EXPORT_SYMBOL_GPL(kvm_pmu_instruction_retired);
 
 void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu)
 {
-	kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+	kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_branches);
 }
 EXPORT_SYMBOL_GPL(kvm_pmu_branch_retired);
 
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 740af816af37..cb93a936a177 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -176,8 +176,12 @@ extern struct x86_pmu_capability kvm_pmu_cap;
 
 void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops);
 
+void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc);
+
 static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
 {
+	kvm_pmu_recalc_pmc_emulation(pmc_to_pmu(pmc), pmc);
+
 	set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 }
-- 
2.50.1.565.gc32cd1483b-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ