[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e25c84b0-75de-4963-a05a-0d0ac5916168@linux.intel.com>
Date: Sat, 20 Sep 2025 08:47:43 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
Yi Lai <yi1.lai@...el.com>
Subject: Re: [PATCH v4 5/5] KVM: selftests: Handle Intel Atom errata that
leads to PMU event overcount
On 9/20/2025 5:46 AM, Sean Christopherson wrote:
> From: dongsheng <dongsheng.x.zhang@...el.com>
>
> Add a PMU errata framework and use it to relax precise event counts on
> Atom platforms that overcount "Instruction Retired" and "Branch Instruction
> Retired" events, as the overcount issues on VM-Exit/VM-Entry are impossible
> to prevent from userspace, e.g. the test can't prevent host IRQs.
>
> Setup errata during early initialization and automatically sync the mask
> to VMs so that tests can check for errata without having to manually
> manage host=>guest variables.
>
> For Intel Atom CPUs, the PMU events "Instruction Retired" or
> "Branch Instruction Retired" may be overcounted for some certain
> instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
> and complex SGX/SMX/CSTATE instructions/flows.
>
> The detailed information can be found in the errata (section SRF7):
> https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
>
> For the Atom platforms before Sierra Forest (including Sierra Forest),
> Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
> be overcounted on these certain instructions, but for Clearwater Forest
> only "Instruction Retired" event is overcounted on these instructions.
>
> Signed-off-by: dongsheng <dongsheng.x.zhang@...el.com>
> Co-developed-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
> Tested-by: Yi Lai <yi1.lai@...el.com>
> Co-developed-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> tools/testing/selftests/kvm/include/x86/pmu.h | 16 +++++++
> tools/testing/selftests/kvm/lib/x86/pmu.c | 44 +++++++++++++++++++
> .../testing/selftests/kvm/lib/x86/processor.c | 4 ++
> .../selftests/kvm/x86/pmu_counters_test.c | 12 ++++-
> .../selftests/kvm/x86/pmu_event_filter_test.c | 4 +-
> 5 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
> index 2aabda2da002..72575eadb63a 100644
> --- a/tools/testing/selftests/kvm/include/x86/pmu.h
> +++ b/tools/testing/selftests/kvm/include/x86/pmu.h
> @@ -5,8 +5,11 @@
> #ifndef SELFTEST_KVM_PMU_H
> #define SELFTEST_KVM_PMU_H
>
> +#include <stdbool.h>
> #include <stdint.h>
>
> +#include <linux/bits.h>
> +
> #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
>
> /*
> @@ -104,4 +107,17 @@ enum amd_pmu_zen_events {
> extern const uint64_t intel_pmu_arch_events[];
> extern const uint64_t amd_pmu_zen_events[];
>
> +enum pmu_errata {
> + INSTRUCTIONS_RETIRED_OVERCOUNT,
> + BRANCHES_RETIRED_OVERCOUNT,
> +};
> +extern uint64_t pmu_errata_mask;
> +
> +void kvm_init_pmu_errata(void);
> +
> +static inline bool this_pmu_has_errata(enum pmu_errata errata)
> +{
> + return pmu_errata_mask & BIT_ULL(errata);
> +}
> +
> #endif /* SELFTEST_KVM_PMU_H */
> diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
> index 5ab44bf54773..34cb57d1d671 100644
> --- a/tools/testing/selftests/kvm/lib/x86/pmu.c
> +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
> @@ -8,6 +8,7 @@
> #include <linux/kernel.h>
>
> #include "kvm_util.h"
> +#include "processor.h"
> #include "pmu.h"
>
> const uint64_t intel_pmu_arch_events[] = {
> @@ -34,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = {
> AMD_ZEN_BRANCHES_MISPREDICTED,
> };
> kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
> +
> +/*
> + * For Intel Atom CPUs, the PMU events "Instruction Retired" or
> + * "Branch Instruction Retired" may be overcounted for some certain
> + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
> + * and complex SGX/SMX/CSTATE instructions/flows.
> + *
> + * The detailed information can be found in the errata (section SRF7):
> + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
> + *
> + * For the Atom platforms before Sierra Forest (including Sierra Forest),
> + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
> + * be overcounted on these certain instructions, but for Clearwater Forest
> + * only "Instruction Retired" event is overcounted on these instructions.
> + */
> +static uint64_t get_pmu_errata(void)
> +{
> + if (!this_cpu_is_intel())
> + return 0;
> +
> + if (this_cpu_family() != 0x6)
> + return 0;
> +
> + switch (this_cpu_model()) {
> + case 0xDD: /* Clearwater Forest */
> + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
> + case 0xAF: /* Sierra Forest */
> + case 0x4D: /* Avaton, Rangely */
> + case 0x5F: /* Denverton */
> + case 0x86: /* Jacobsville */
> + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
> + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
> + default:
> + return 0;
> + }
> +}
> +
> +uint64_t pmu_errata_mask;
> +
> +void kvm_init_pmu_errata(void)
> +{
> + pmu_errata_mask = get_pmu_errata();
> +}
> diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
> index 3b63c99f7b96..4402d2e1ea69 100644
> --- a/tools/testing/selftests/kvm/lib/x86/processor.c
> +++ b/tools/testing/selftests/kvm/lib/x86/processor.c
> @@ -6,6 +6,7 @@
> #include "linux/bitmap.h"
> #include "test_util.h"
> #include "kvm_util.h"
> +#include "pmu.h"
> #include "processor.h"
> #include "sev.h"
>
> @@ -638,6 +639,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
> sync_global_to_guest(vm, host_cpu_is_intel);
> sync_global_to_guest(vm, host_cpu_is_amd);
> sync_global_to_guest(vm, is_forced_emulation_enabled);
> + sync_global_to_guest(vm, pmu_errata_mask);
>
> if (is_sev_vm(vm)) {
> struct kvm_sev_init init = { 0 };
> @@ -1269,6 +1271,8 @@ void kvm_selftest_arch_init(void)
> host_cpu_is_intel = this_cpu_is_intel();
> host_cpu_is_amd = this_cpu_is_amd();
> is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
> +
> + kvm_init_pmu_errata();
> }
>
> bool sys_clocksource_is_based_on_tsc(void)
> diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
> index 24599d98f898..eb6c12a2cdd4 100644
> --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
> @@ -163,10 +163,18 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr
>
> switch (idx) {
> case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
> - GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
> + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
> + if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
> + GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
> + else
> + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
> break;
> case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
> - GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
> + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
> + if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
> + GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
> + else
> + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
> break;
> case INTEL_ARCH_LLC_REFERENCES_INDEX:
> case INTEL_ARCH_LLC_MISSES_INDEX:
> diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
> index c15513cd74d1..1c5b7611db24 100644
> --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
> +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
> @@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
> do { \
> uint64_t br = pmc_results.branches_retired; \
> uint64_t ir = pmc_results.instructions_retired; \
> + bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \
> + br >= NUM_BRANCHES : br == NUM_BRANCHES; \
> \
> - if (br && br != NUM_BRANCHES) \
> + if (br && !br_matched) \
> pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
> __func__, br, NUM_BRANCHES); \
> TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
Reviewed-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
Powered by blists - more mailing lists