[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5f51fda5-bc07-42ac-a723-d09d90136961@linux.intel.com>
Date: Fri, 12 Jan 2024 17:14:26 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
Kan Liang <kan.liang@...ux.intel.com>, Jim Mattson <jmattson@...gle.com>,
Jinrong Liang <cloudliang@...cent.com>, Aaron Lewis <aaronlewis@...gle.com>,
Like Xu <likexu@...cent.com>
Subject: Re: [PATCH v10 16/29] KVM: selftests: Test Intel PMU architectural
events on gp counters
On 1/10/2024 7:02 AM, Sean Christopherson wrote:
> From: Jinrong Liang <cloudliang@...cent.com>
>
> Add test cases to verify that Intel's Architectural PMU events work as
> expected when they are available according to guest CPUID. Iterate over a
> range of sane PMU versions, with and without full-width writes enabled,
> and over interesting combinations of lengths/masks for the bit vector that
> enumerates unavailable events.
>
> Test up to vPMU version 5, i.e. the current architectural max. KVM only
> officially supports up to version 2, but the behavior of the counters is
> backwards compatible, i.e. KVM shouldn't do something completely different
> for a higher, architecturally-defined vPMU version. Verify KVM behavior
> against the effective vPMU version, e.g. advertising vPMU 5 when KVM only
> supports vPMU 2 shouldn't magically unlock vPMU 5 features.
>
> According to Intel SDM, the number of architectural events is reported
> through CPUID.0AH:EAX[31:24] and the architectural event x is supported
> if EBX[x]=0 && EAX[31:24]>x.
>
> Handcode the entirety of the measured section so that the test can
> precisely assert on the number of instructions and branches retired.
>
> Co-developed-by: Like Xu <likexu@...cent.com>
> Signed-off-by: Like Xu <likexu@...cent.com>
> Signed-off-by: Jinrong Liang <cloudliang@...cent.com>
> Co-developed-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> tools/testing/selftests/kvm/Makefile | 1 +
> .../selftests/kvm/x86_64/pmu_counters_test.c | 321 ++++++++++++++++++
> 2 files changed, 322 insertions(+)
> create mode 100644 tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
>
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index 479bd85e1c56..ab96fc80bfbd 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -81,6 +81,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
> TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
> TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
> TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
> +TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
> TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
> TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
> TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> new file mode 100644
> index 000000000000..5b8687bb4639
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -0,0 +1,321 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2023, Tencent, Inc.
> + */
> +
> +#define _GNU_SOURCE /* for program_invocation_short_name */
> +#include <x86intrin.h>
> +
> +#include "pmu.h"
> +#include "processor.h"
> +
> +/* Number of LOOP instructions for the guest measurement payload. */
> +#define NUM_BRANCHES 10
> +/*
> + * Number of "extra" instructions that will be counted, i.e. the number of
> + * instructions that are needed to set up the loop and then disabled the
> + * counter. 2 MOV, 2 XOR, 1 WRMSR.
> + */
> +#define NUM_EXTRA_INSNS 5
> +#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
> +
> +static uint8_t kvm_pmu_version;
> +static bool kvm_has_perf_caps;
> +
> +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
> + void *guest_code,
> + uint8_t pmu_version,
> + uint64_t perf_capabilities)
> +{
> + struct kvm_vm *vm;
> +
> + vm = vm_create_with_one_vcpu(vcpu, guest_code);
> + vm_init_descriptor_tables(vm);
> + vcpu_init_descriptor_tables(*vcpu);
> +
> + sync_global_to_guest(vm, kvm_pmu_version);
> +
> + /*
> + * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
> + * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
> + */
> + if (kvm_has_perf_caps)
> + vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
> +
> + vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
> + return vm;
> +}
> +
> +static void run_vcpu(struct kvm_vcpu *vcpu)
> +{
> + struct ucall uc;
> +
> + do {
> + vcpu_run(vcpu);
> + switch (get_ucall(vcpu, &uc)) {
> + case UCALL_SYNC:
> + break;
> + case UCALL_ABORT:
> + REPORT_GUEST_ASSERT(uc);
> + break;
> + case UCALL_PRINTF:
> + pr_info("%s", uc.buffer);
> + break;
> + case UCALL_DONE:
> + break;
> + default:
> + TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
> + }
> + } while (uc.cmd != UCALL_DONE);
> +}
> +
> +static uint8_t guest_get_pmu_version(void)
> +{
> + /*
> + * Return the effective PMU version, i.e. the minimum between what KVM
> + * supports and what is enumerated to the guest. The host deliberately
> + * advertises a PMU version to the guest beyond what is actually
> + * supported by KVM to verify KVM doesn't freak out and do something
> + * bizarre with an architecturally valid, but unsupported, version.
> + */
> + return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
> +}
> +
> +/*
> + * If an architectural event is supported and guaranteed to generate at least
> + * one "hit, assert that its count is non-zero. If an event isn't supported or
> + * the test can't guarantee the associated action will occur, then all bets are
> + * off regarding the count, i.e. no checks can be done.
> + *
> + * Sanity check that in all cases, the event doesn't count when it's disabled,
> + * and that KVM correctly emulates the write of an arbitrary value.
> + */
> +static void guest_assert_event_count(uint8_t idx,
> + struct kvm_x86_pmu_feature event,
> + uint32_t pmc, uint32_t pmc_msr)
> +{
> + uint64_t count;
> +
> + count = _rdpmc(pmc);
> + if (!this_pmu_has(event))
> + goto sanity_checks;
> +
> + switch (idx) {
> + case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
> + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
> + break;
> + case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
> + GUEST_ASSERT_EQ(count, NUM_BRANCHES);
> + break;
> + case INTEL_ARCH_CPU_CYCLES_INDEX:
> + case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
Since we already support slots event in below guest_test_arch_event(),
we can add check for INTEL_ARCH_TOPDOWN_SLOTS_INDEX here.
> + GUEST_ASSERT_NE(count, 0);
> + break;
> + default:
> + break;
> + }
> +
> +sanity_checks:
> + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> + GUEST_ASSERT_EQ(_rdpmc(pmc), count);
> +
> + wrmsr(pmc_msr, 0xdead);
> + GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
> +}
> +
> +static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
> + uint32_t pmc, uint32_t pmc_msr,
> + uint32_t ctrl_msr, uint64_t ctrl_msr_value)
> +{
> + wrmsr(pmc_msr, 0);
> +
> + /*
> + * Enable and disable the PMC in a monolithic asm blob to ensure that
> + * the compiler can't insert _any_ code into the measured sequence.
> + * Note, ECX doesn't need to be clobbered as the input value, @pmc_msr,
> + * is restored before the end of the sequence.
> + */
> + __asm__ __volatile__("wrmsr\n\t"
> + "mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"
> + "loop .\n\t"
> + "mov %%edi, %%ecx\n\t"
> + "xor %%eax, %%eax\n\t"
> + "xor %%edx, %%edx\n\t"
> + "wrmsr\n\t"
> + :: "a"((uint32_t)ctrl_msr_value),
> + "d"(ctrl_msr_value >> 32),
> + "c"(ctrl_msr), "D"(ctrl_msr)
> + );
> +
> + guest_assert_event_count(idx, event, pmc, pmc_msr);
> +}
> +
> +static void guest_test_arch_event(uint8_t idx)
> +{
> + const struct {
> + struct kvm_x86_pmu_feature gp_event;
> + } intel_event_to_feature[] = {
> + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES },
> + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED },
> + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES },
> + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES },
> + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES },
> + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
> + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
> + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS },
> + };
> +
> + uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
> + uint32_t pmu_version = guest_get_pmu_version();
> + /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
> + bool guest_has_perf_global_ctrl = pmu_version >= 2;
> + struct kvm_x86_pmu_feature gp_event;
> + uint32_t base_pmc_msr;
> + unsigned int i;
> +
> + /* The host side shouldn't invoke this without a guest PMU. */
> + GUEST_ASSERT(pmu_version);
> +
> + if (this_cpu_has(X86_FEATURE_PDCM) &&
> + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
> + base_pmc_msr = MSR_IA32_PMC0;
> + else
> + base_pmc_msr = MSR_IA32_PERFCTR0;
> +
> + gp_event = intel_event_to_feature[idx].gp_event;
> + GUEST_ASSERT_EQ(idx, gp_event.f.bit);
> +
> + GUEST_ASSERT(nr_gp_counters);
> +
> + for (i = 0; i < nr_gp_counters; i++) {
> + uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
> + ARCH_PERFMON_EVENTSEL_ENABLE |
> + intel_pmu_arch_events[idx];
> +
> + wrmsr(MSR_P6_EVNTSEL0 + i, 0);
> + if (guest_has_perf_global_ctrl)
> + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
> +
> + __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
> + MSR_P6_EVNTSEL0 + i, eventsel);
> + }
> +}
> +
> +static void guest_test_arch_events(void)
> +{
> + uint8_t i;
> +
> + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
> + guest_test_arch_event(i);
> +
> + GUEST_DONE();
> +}
> +
> +static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
> + uint8_t length, uint8_t unavailable_mask)
> +{
> + struct kvm_vcpu *vcpu;
> + struct kvm_vm *vm;
> +
> + /* Testing arch events requires a vPMU (there are no negative tests). */
> + if (!pmu_version)
> + return;
> +
> + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
> + pmu_version, perf_capabilities);
> +
> + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
> + length);
> + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
> + unavailable_mask);
> +
> + run_vcpu(vcpu);
> +
> + kvm_vm_free(vm);
> +}
> +
> +static void test_intel_counters(void)
> +{
> + uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
> + uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
> + unsigned int i;
> + uint8_t v, j;
> + uint32_t k;
> +
> + const uint64_t perf_caps[] = {
> + 0,
> + PMU_CAP_FW_WRITES,
> + };
> +
> + /*
> + * Test up to PMU v5, which is the current maximum version defined by
> + * Intel, i.e. is the last version that is guaranteed to be backwards
> + * compatible with KVM's existing behavior.
> + */
> + uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
> +
> + /*
> + * Detect the existence of events that aren't supported by selftests.
> + * This will (obviously) fail any time the kernel adds support for a
> + * new event, but it's worth paying that price to keep the test fresh.
> + */
> + TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
> + "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
> + nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
> +
> + /*
> + * Force iterating over known arch events regardless of whether or not
> + * KVM/hardware supports a given event.
> + */
> + nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
> +
> + for (v = 0; v <= max_pmu_version; v++) {
> + for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
> + if (!kvm_has_perf_caps && perf_caps[i])
> + continue;
> +
> + pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
> + v, perf_caps[i]);
> + /*
> + * To keep the total runtime reasonable, test every
> + * possible non-zero, non-reserved bitmap combination
> + * only with the native PMU version and the full bit
> + * vector length.
> + */
> + if (v == pmu_version) {
> + for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
> + test_arch_events(v, perf_caps[i], nr_arch_events, k);
> + }
> + /*
> + * Test single bits for all PMU version and lengths up
> + * the number of events +1 (to verify KVM doesn't do
> + * weird things if the guest length is greater than the
> + * host length). Explicitly test a mask of '0' and all
> + * ones i.e. all events being available and unavailable.
> + */
> + for (j = 0; j <= nr_arch_events + 1; j++) {
> + test_arch_events(v, perf_caps[i], j, 0);
> + test_arch_events(v, perf_caps[i], j, 0xff);
> +
> + for (k = 0; k < nr_arch_events; k++)
> + test_arch_events(v, perf_caps[i], j, BIT(k));
> + }
> + }
> + }
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
> +
> + TEST_REQUIRE(host_cpu_is_intel);
> + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
> + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
> +
> + kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
> + kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
> +
> + test_intel_counters();
> +
> + return 0;
> +}
Powered by blists - more mailing lists