lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220411093537.11558-9-likexu@tencent.com>
Date:   Mon, 11 Apr 2022 17:35:34 +0800
From:   Like Xu <like.xu.linux@...il.com>
To:     Paolo Bonzini <pbonzini@...hat.com>,
        Jim Mattson <jmattson@...gle.com>
Cc:     Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Joerg Roedel <joro@...tes.org>, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org, Like Xu <likexu@...cent.com>,
        Peter Zijlstra <peterz@...radead.org>
Subject: [PATCH v3 08/11] KVM: x86/pmu: Use PERF_TYPE_RAW to merge reprogram_{gp,fixed}counter()

From: Like Xu <likexu@...cent.com>

The code sketch for reprogram_{gp, fixed}_counter() is similar, while the
fixed counter using the PERF_TYPE_HARDWAR type and the gp being
able to use either PERF_TYPE_HARDWAR or PERF_TYPE_RAW type
depending on the pmc->eventsel value.

After 'commit 761875634a5e ("KVM: x86/pmu: Setup pmc->eventsel
for fixed PMCs")', the pmc->eventsel of the fixed counter will also have
been setup with the same semantic value and will not be changed during
the guest runtime.

The original story of using the PERF_TYPE_HARDWARE type is to emulate
guest architecture PMU on a host without architecture PMU (the Pentium 4),
for which the guest vPMC needs to be reprogrammed using the kernel
generic perf_hw_id. But essentially, "the HARDWARE is just a convenience
wrapper over RAW IIRC", quoated from Peterz. So it could be pretty safe
to use the PERF_TYPE_RAW type only in practice to program both gp and
fixed counters naturally in the reprogram_counter().

To make the gp and fixed counters more semantically symmetrical,
the selection of EVENTSEL_{USER, OS, INT} bits is temporarily translated
via fixed_ctr_ctrl before the pmc_reprogram_counter() call.

Cc: Peter Zijlstra <peterz@...radead.org>
Suggested-by: Jim Mattson <jmattson@...gle.com>
Signed-off-by: Like Xu <likexu@...cent.com>
---
 arch/x86/kvm/pmu.c           | 125 ++++++++++++-----------------------
 arch/x86/kvm/vmx/pmu_intel.c |   3 +-
 2 files changed, 46 insertions(+), 82 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 598b19223965..58960844f49f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -240,84 +240,58 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
 	return allow_event;
 }
 
-static void reprogram_gp_counter(struct kvm_pmc *pmc)
-{
-	u64 config;
-	u32 type = PERF_TYPE_RAW;
-	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-	u64 eventsel = pmc->eventsel;
-
-	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
-		printk_once("kvm pmu: pin control bit is ignored\n");
-
-	pmc_pause_counter(pmc);
-
-	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
-		return;
-
-	if (!check_pmu_event_filter(pmc))
-		return;
-
-	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
-			  ARCH_PERFMON_EVENTSEL_INV |
-			  ARCH_PERFMON_EVENTSEL_CMASK |
-			  HSW_IN_TX |
-			  HSW_IN_TX_CHECKPOINTED))) {
-		config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
-		if (config != PERF_COUNT_HW_MAX)
-			type = PERF_TYPE_HARDWARE;
-	}
-
-	if (type == PERF_TYPE_RAW)
-		config = eventsel & pmu->raw_event_mask;
-
-	if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
-		return;
-
-	pmc_release_perf_event(pmc);
-
-	pmc->current_config = eventsel;
-	pmc_reprogram_counter(pmc, type, config,
-			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
-			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
-}
-
-static void reprogram_fixed_counter(struct kvm_pmc *pmc)
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
 {
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-	int idx = pmc->idx - INTEL_PMC_IDX_FIXED;
-	u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
-	unsigned en_field = ctrl & 0x3;
-	bool pmi = ctrl & 0x8;
 
-	pmc_pause_counter(pmc);
+	if (pmc_is_fixed(pmc))
+		return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+			pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
 
-	if (!en_field || !pmc_is_enabled(pmc))
-		return;
-
-	if (!check_pmu_event_filter(pmc))
-		return;
-
-	if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
-		return;
-
-	pmc_release_perf_event(pmc);
-
-	pmc->current_config = (u64)ctrl;
-	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
-			      static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc),
-			      !(en_field & 0x2), /* exclude user */
-			      !(en_field & 0x1), /* exclude kernel */
-			      pmi);
+	return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
 }
 
 void reprogram_counter(struct kvm_pmc *pmc)
 {
-	if (pmc_is_gp(pmc))
-		reprogram_gp_counter(pmc);
-	else
-		reprogram_fixed_counter(pmc);
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+	u64 eventsel = pmc->eventsel;
+	u64 new_config = eventsel;
+	u8 fixed_ctr_ctrl;
+
+	pmc_pause_counter(pmc);
+
+	if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
+		return;
+
+	if (!check_pmu_event_filter(pmc))
+		return;
+
+	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+		printk_once("kvm pmu: pin control bit is ignored\n");
+
+	if (pmc_is_fixed(pmc)) {
+		fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+						  pmc->idx - INTEL_PMC_IDX_FIXED);
+		if (fixed_ctr_ctrl & 0x1)
+			eventsel |= ARCH_PERFMON_EVENTSEL_OS;
+		if (fixed_ctr_ctrl & 0x2)
+			eventsel |= ARCH_PERFMON_EVENTSEL_USR;
+		if (fixed_ctr_ctrl & 0x8)
+			eventsel |= ARCH_PERFMON_EVENTSEL_INT;
+		new_config = (u64)fixed_ctr_ctrl;
+	}
+
+	if (pmc->current_config == new_config && pmc_resume_counter(pmc))
+		return;
+
+	pmc_release_perf_event(pmc);
+
+	pmc->current_config = new_config;
+	pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+			      (eventsel & pmu->raw_event_mask),
+			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
 }
 EXPORT_SYMBOL_GPL(reprogram_counter);
 
@@ -474,17 +448,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
 	kvm_pmu_refresh(vcpu);
 }
 
-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
-	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
-	if (pmc_is_fixed(pmc))
-		return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
-			pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
-	return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
 /* Release perf_events for vPMCs that have been unused for a full time slice.  */
 void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 1f910b349978..11eb186929bc 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -498,7 +498,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->raw_event_mask = X86_RAW_EVENT_MASK;
 
 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
-	if (!entry || !vcpu->kvm->arch.enable_pmu)
+	if (!entry || !vcpu->kvm->arch.enable_pmu ||
+	    !boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
 		return;
 	eax.full = entry->eax;
 	edx.full = entry->edx;
-- 
2.35.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ