[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210104131542.495413-6-like.xu@linux.intel.com>
Date: Mon, 4 Jan 2021 21:15:30 +0800
From: Like Xu <like.xu@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
Paolo Bonzini <pbonzini@...hat.com>, eranian@...gle.com,
kvm@...r.kernel.org
Cc: Ingo Molnar <mingo@...hat.com>,
Sean Christopherson <seanjc@...gle.com>,
Thomas Gleixner <tglx@...utronix.de>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>,
Andi Kleen <andi@...stfloor.org>,
Kan Liang <kan.liang@...ux.intel.com>, wei.w.wang@...el.com,
luwei.kang@...el.com, linux-kernel@...r.kernel.org
Subject: [PATCH v3 05/17] KVM: x86/pmu: Reprogram guest PEBS event to emulate guest PEBS counter
When a guest counter is configured as a PEBS counter through
IA32_PEBS_ENABLE, a guest PEBS event will be reprogrammed by
configuring a non-zero precision level in the perf_event_attr.
The guest PEBS overflow PMI bit would be set in the guest
GLOBAL_STATUS MSR when PEBS facility generates a PEBS
overflow PMI based on guest IA32_DS_AREA MSR.
The attr.precise_ip would be adjusted to a special precision
level when the new PEBS-PDIR feature is supported later which
would affect the host counters scheduling.
The guest PEBS event would not be reused for non-PEBS
guest event even with the same guest counter index.
Originally-by: Andi Kleen <ak@...ux.intel.com>
Co-developed-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Like Xu <like.xu@...ux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/pmu.c | 29 +++++++++++++++++++++++++++--
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 94c8bfee4a82..09dacda33fb8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -449,6 +449,8 @@ struct kvm_pmu {
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+ u64 pebs_enable;
+
/*
* The gate to release perf_events not marked in
* pmc_in_use only once in a vcpu time slice.
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 67741d2a0308..2e81c50323e2 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -76,7 +76,12 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+ if (perf_event->attr.precise_ip) {
+ /* Indicate PEBS overflow PMI to guest. */
+ __set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
+ (unsigned long *)&pmu->global_status);
+ } else
+ __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
/*
@@ -99,6 +104,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
bool exclude_kernel, bool intr,
bool in_tx, bool in_tx_cp)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
struct perf_event *event;
struct perf_event_attr attr = {
.type = type,
@@ -110,6 +116,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.exclude_kernel = exclude_kernel,
.config = config,
};
+ bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
attr.sample_period = get_sample_period(pmc, pmc->counter);
@@ -124,9 +131,23 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
attr.sample_period = 0;
attr.config |= HSW_IN_TX_CHECKPOINTED;
}
+ if (pebs) {
+ /*
+ * The non-zero precision level of guest event makes the ordinary
+ * guest event becomes a guest PEBS event and triggers the host
+ * PEBS PMI handler to determine whether the PEBS overflow PMI
+ * comes from the host counters or the guest.
+ *
+ * For most PEBS hardware events, the difference in the software
+ * precision levels of guest and host PEBS events will not affect
+ * the accuracy of the PEBS profiling result, because the "event IP"
+ * in the PEBS record is calibrated on the guest side.
+ */
+ attr.precise_ip = 1;
+ }
event = perf_event_create_kernel_counter(&attr, -1, current,
- intr ? kvm_perf_overflow_intr :
+ (intr || pebs) ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
@@ -161,6 +182,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
get_sample_period(pmc, pmc->counter)))
return false;
+ if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
+ pmc->perf_event->attr.precise_ip)
+ return false;
+
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
perf_event_enable(pmc->perf_event);
--
2.29.2
Powered by blists - more mailing lists