linux-kernel - [PATCH 3/3] KVM: x86/pmu: Segregate Intel and AMD specific logic

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220221073140.10618-4-ravi.bangoria@amd.com>
Date:   Mon, 21 Feb 2022 13:01:40 +0530
From:   Ravi Bangoria <ravi.bangoria@....com>
To:     <pbonzini@...hat.com>
CC:     <ravi.bangoria@....com>, <seanjc@...gle.com>,
        <jmattson@...gle.com>, <dave.hansen@...ux.intel.com>,
        <peterz@...radead.org>, <alexander.shishkin@...ux.intel.com>,
        <eranian@...gle.com>, <daviddunn@...gle.com>, <ak@...ux.intel.com>,
        <kan.liang@...ux.intel.com>, <like.xu.linux@...il.com>,
        <x86@...nel.org>, <kvm@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>, <kim.phillips@....com>,
        <santosh.shukla@....com>
Subject: [PATCH 3/3] KVM: x86/pmu: Segregate Intel and AMD specific logic

HSW_IN_TX* bits are used in generic code which are not supported on
AMD. Worse, these bits overlap with AMD EventSelect[11:8] and hence
using HSW_IN_TX* bits unconditionally in generic code is resulting in
unintentional pmu behavior on AMD. For example, if EventSelect[11:8]
is 0x2, pmc_reprogram_counter() wrongly assumes that
HSW_IN_TX_CHECKPOINTED is set and thus forces sampling period to be 0.

Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM")
Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
---
 arch/x86/kvm/pmu.c           | 66 +++++++++++++++++++++++-------------
 arch/x86/kvm/pmu.h           |  4 +--
 arch/x86/kvm/svm/pmu.c       |  6 +++-
 arch/x86/kvm/vmx/pmu_intel.c |  4 +--
 4 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 4a70380f2287..b91dbede87b3 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -97,7 +97,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 				  u64 config, bool exclude_user,
 				  bool exclude_kernel, bool intr,
-				  bool in_tx, bool in_tx_cp)
+				  bool in_tx, bool in_tx_cp, bool is_intel)
 {
 	struct perf_event *event;
 	struct perf_event_attr attr = {
@@ -116,16 +116,18 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 
 	attr.sample_period = get_sample_period(pmc, pmc->counter);
 
-	if (in_tx)
-		attr.config |= INTEL_HSW_IN_TX;
-	if (in_tx_cp) {
-		/*
-		 * INTEL_HSW_IN_TX_CHECKPOINTED is not supported with nonzero
-		 * period. Just clear the sample period so at least
-		 * allocating the counter doesn't fail.
-		 */
-		attr.sample_period = 0;
-		attr.config |= INTEL_HSW_IN_TX_CHECKPOINTED;
+	if (is_intel) {
+		if (in_tx)
+			attr.config |= INTEL_HSW_IN_TX;
+		if (in_tx_cp) {
+			/*
+			 * INTEL_HSW_IN_TX_CHECKPOINTED is not supported with nonzero
+			 * period. Just clear the sample period so at least
+			 * allocating the counter doesn't fail.
+			 */
+			attr.sample_period = 0;
+			attr.config |= INTEL_HSW_IN_TX_CHECKPOINTED;
+		}
 	}
 
 	event = perf_event_create_kernel_counter(&attr, -1, current,
@@ -179,13 +181,14 @@ static int cmp_u64(const void *a, const void *b)
 	return *(__u64 *)a - *(__u64 *)b;
 }
 
-void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel, bool is_intel)
 {
 	u64 config;
 	u32 type = PERF_TYPE_RAW;
 	struct kvm *kvm = pmc->vcpu->kvm;
 	struct kvm_pmu_event_filter *filter;
 	bool allow_event = true;
+	u64 eventsel_mask;
 
 	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
 		printk_once("kvm pmu: pin control bit is ignored\n");
@@ -210,18 +213,31 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	if (!allow_event)
 		return;
 
-	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
-			  ARCH_PERFMON_EVENTSEL_INV |
-			  ARCH_PERFMON_EVENTSEL_CMASK |
-			  INTEL_HSW_IN_TX |
-			  INTEL_HSW_IN_TX_CHECKPOINTED))) {
+	eventsel_mask = ARCH_PERFMON_EVENTSEL_EDGE |
+			ARCH_PERFMON_EVENTSEL_INV |
+			ARCH_PERFMON_EVENTSEL_CMASK;
+	if (is_intel) {
+		eventsel_mask |= INTEL_HSW_IN_TX | INTEL_HSW_IN_TX_CHECKPOINTED;
+	} else {
+		/*
+		 * None of the AMD generalized events has EventSelect[11:8]
+		 * set so far.
+		 */
+		eventsel_mask |= (0xFULL << 32);
+	}
+
+	if (!(eventsel & eventsel_mask)) {
 		config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
 		if (config != PERF_COUNT_HW_MAX)
 			type = PERF_TYPE_HARDWARE;
 	}
 
-	if (type == PERF_TYPE_RAW)
-		config = eventsel & AMD64_RAW_EVENT_MASK;
+	if (type == PERF_TYPE_RAW) {
+		if (is_intel)
+			config = eventsel & X86_RAW_EVENT_MASK;
+		else
+			config = eventsel & AMD64_RAW_EVENT_MASK;
+	}
 
 	if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
 		return;
@@ -234,11 +250,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
 			      eventsel & ARCH_PERFMON_EVENTSEL_INT,
 			      (eventsel & INTEL_HSW_IN_TX),
-			      (eventsel & INTEL_HSW_IN_TX_CHECKPOINTED));
+			      (eventsel & INTEL_HSW_IN_TX_CHECKPOINTED),
+			      is_intel);
 }
 EXPORT_SYMBOL_GPL(reprogram_gp_counter);
 
-void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
+void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx, bool is_intel)
 {
 	unsigned en_field = ctrl & 0x3;
 	bool pmi = ctrl & 0x8;
@@ -270,24 +287,25 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 			      kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
 			      !(en_field & 0x2), /* exclude user */
 			      !(en_field & 0x1), /* exclude kernel */
-			      pmi, false, false);
+			      pmi, false, false, is_intel);
 }
 EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
 
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
 {
 	struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
+	bool is_intel = !strncmp(kvm_x86_ops.name, "kvm_intel", 9);
 
 	if (!pmc)
 		return;
 
 	if (pmc_is_gp(pmc))
-		reprogram_gp_counter(pmc, pmc->eventsel);
+		reprogram_gp_counter(pmc, pmc->eventsel, is_intel);
 	else {
 		int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
 		u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
 
-		reprogram_fixed_counter(pmc, ctrl, idx);
+		reprogram_fixed_counter(pmc, ctrl, idx, is_intel);
 	}
 }
 EXPORT_SYMBOL_GPL(reprogram_counter);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7a7b8d5b775e..610a4cbf85a4 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -140,8 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
 	return sample_period;
 }
 
-void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
-void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
+void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel, bool is_intel);
+void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx, bool is_intel);
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
 
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 5aa45f13b16d..9ad63e940883 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -140,6 +140,10 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
 
 static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
 {
+	/*
+	 * None of the AMD generalized events has EventSelect[11:8] set.
+	 * Hence 8 bit event_select works for now.
+	 */
 	u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
 	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 	int i;
@@ -265,7 +269,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (data == pmc->eventsel)
 			return 0;
 		if (!(data & pmu->reserved_bits)) {
-			reprogram_gp_counter(pmc, data);
+			reprogram_gp_counter(pmc, data, false);
 			return 0;
 		}
 	}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7c64792a9506..ba1fbd37f608 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -50,7 +50,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 			continue;
 
 		__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
-		reprogram_fixed_counter(pmc, new_ctrl, i);
+		reprogram_fixed_counter(pmc, new_ctrl, i, true);
 	}
 
 	pmu->fixed_ctr_ctrl = data;
@@ -444,7 +444,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			if (data == pmc->eventsel)
 				return 0;
 			if (!(data & pmu->reserved_bits)) {
-				reprogram_gp_counter(pmc, data);
+				reprogram_gp_counter(pmc, data, true);
 				return 0;
 			}
 		} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
-- 
2.27.0