[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <a700ab4c-0e8d-499d-be71-f24c4a6439cf@amd.com>
Date: Mon, 26 May 2025 11:45:24 +0530
From: Sandipan Das <sandipan.das@....com>
To: Mingwei Zhang <mizhang@...gle.com>, Peter Zijlstra
<peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>, Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>
Cc: Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>, Liang@...gle.com,
Kan <kan.liang@...ux.intel.com>, "H. Peter Anvin" <hpa@...or.com>,
linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org, linux-kselftest@...r.kernel.org,
Yongwei Ma <yongwei.ma@...el.com>,
Xiong Zhang <xiong.y.zhang@...ux.intel.com>,
Dapeng Mi <dapeng1.mi@...ux.intel.com>, Jim Mattson <jmattson@...gle.com>,
Zide Chen <zide.chen@...el.com>, Eranian Stephane <eranian@...gle.com>,
Shukla Manali <Manali.Shukla@....com>,
Nikunj Dadhania <nikunj.dadhania@....com>
Subject: Re: [PATCH v4 20/38] KVM: x86/pmu: Check if mediated vPMU can
intercept rdpmc
On 3/24/2025 11:01 PM, Mingwei Zhang wrote:
> From: Dapeng Mi <dapeng1.mi@...ux.intel.com>
>
> Check if rdpmc can be intercepted for mediated vPMU. Simply speaking,
> if guest own all PMU counters in mediated vPMU, then rdpmc interception
> should be disabled to mitigate the performance impact, otherwise rdpmc
> has to be intercepted to avoid guest obtain host counter's data via
> rdpmc instruction.
>
> Co-developed-by: Mingwei Zhang <mizhang@...gle.com>
> Signed-off-by: Mingwei Zhang <mizhang@...gle.com>
> Co-developed-by: Sandipan Das <sandipan.das@....com>
> Signed-off-by: Sandipan Das <sandipan.das@....com>
> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
> ---
> arch/x86/include/asm/msr-index.h | 1 +
> arch/x86/kvm/pmu.c | 34 ++++++++++++++++++++++++++++++++
> arch/x86/kvm/pmu.h | 19 ++++++++++++++++++
> arch/x86/kvm/svm/pmu.c | 14 ++++++++++++-
> arch/x86/kvm/vmx/pmu_intel.c | 18 ++++++++---------
> 5 files changed, 76 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index ca70846ffd55..337f4b0a2998 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -312,6 +312,7 @@
> #define PERF_CAP_PEBS_FORMAT 0xf00
> #define PERF_CAP_FW_WRITES BIT_ULL(13)
> #define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
> +#define PERF_CAP_PERF_METRICS BIT_ULL(15)
> #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
> PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 92c742ead663..6ad71752be4b 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -604,6 +604,40 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
> return 0;
> }
>
> +inline bool kvm_rdpmc_in_guest(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> + if (!kvm_mediated_pmu_enabled(vcpu))
> + return false;
> +
> + /*
> + * VMware allows access to these Pseduo-PMCs even when read via RDPMC
> + * in Ring3 when CR4.PCE=0.
> + */
> + if (enable_vmware_backdoor)
> + return false;
> +
> + /*
> + * FIXME: In theory, perf metrics is always combined with fixed
> + * counter 3. it's fair enough to compare the guest and host
> + * fixed counter number and don't need to check perf metrics
> + * explicitly. However kvm_pmu_cap.num_counters_fixed is limited
> + * KVM_MAX_NR_FIXED_COUNTERS (3) as fixed counter 3 is not
> + * supported now. perf metrics is still needed to be checked
> + * explicitly here. Once fixed counter 3 is supported, the perf
> + * metrics checking can be removed.
> + */
> + return pmu->nr_arch_gp_counters == kvm_pmu_cap.num_counters_gp &&
> + pmu->nr_arch_fixed_counters == kvm_pmu_cap.num_counters_fixed &&
> + vcpu_has_perf_metrics(vcpu) == kvm_host_has_perf_metrics() &&
> + pmu->counter_bitmask[KVM_PMC_GP] ==
> + (BIT_ULL(kvm_pmu_cap.bit_width_gp) - 1) &&
> + pmu->counter_bitmask[KVM_PMC_FIXED] ==
> + (BIT_ULL(kvm_pmu_cap.bit_width_fixed) - 1);
> +}
> +EXPORT_SYMBOL_GPL(kvm_rdpmc_in_guest);
> +
> void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
> {
> if (lapic_in_kernel(vcpu)) {
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index e1d0096f249b..509c995b7871 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -271,6 +271,24 @@ static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
> return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
> }
>
> +static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
> +{
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
> + return 0;
> +
> + return vcpu->arch.perf_capabilities;
> +}
> +
> +static inline bool vcpu_has_perf_metrics(struct kvm_vcpu *vcpu)
> +{
> + return !!(vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PERF_METRICS);
> +}
> +
> +static inline bool kvm_host_has_perf_metrics(void)
> +{
> + return !!(kvm_host.perf_capabilities & PERF_CAP_PERF_METRICS);
> +}
> +
> void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
> void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
> int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
> @@ -287,6 +305,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);
> bool vcpu_pmu_can_enable(struct kvm_vcpu *vcpu);
>
> bool is_vmware_backdoor_pmc(u32 pmc_idx);
> +bool kvm_rdpmc_in_guest(struct kvm_vcpu *vcpu);
>
> extern struct kvm_pmu_ops intel_pmu_ops;
> extern struct kvm_pmu_ops amd_pmu_ops;
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index c8b9fd9b5350..153972e944eb 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -173,7 +173,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> return 1;
> }
>
> -static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> +static void __amd_pmu_refresh(struct kvm_vcpu *vcpu)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> union cpuid_0x80000022_ebx ebx;
> @@ -212,6 +212,18 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
> }
>
> +static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_svm *svm = to_svm(vcpu);
> +
> + __amd_pmu_refresh(vcpu);
> +
> + if (kvm_rdpmc_in_guest(vcpu))
> + svm_clr_intercept(svm, INTERCEPT_RDPMC);
> + else
> + svm_set_intercept(svm, INTERCEPT_RDPMC);
> +}
> +
After putting kprobes on kvm_pmu_rdpmc(), I noticed that RDPMC instructions were
getting intercepted for the secondary vCPUs. This happens because when secondary
vCPUs come up, kvm_vcpu_reset() gets called after guest CPUID has been updated.
While RDPMC interception is initially disabled in the kvm_pmu_refresh() path, it
gets re-enabled in the kvm_vcpu_reset() path as svm_vcpu_reset() calls init_vmcb().
We should consider adding the following change to avoid that.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6f9142063cc4..1c9c183092f3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1354,7 +1354,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_SMI);
svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
- svm_set_intercept(svm, INTERCEPT_RDPMC);
svm_set_intercept(svm, INTERCEPT_CPUID);
svm_set_intercept(svm, INTERCEPT_INVD);
svm_set_intercept(svm, INTERCEPT_INVLPG);
> static void amd_pmu_init(struct kvm_vcpu *vcpu)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index fc017e9a6a0c..2a5f79206b02 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -108,14 +108,6 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
> return &counters[array_index_nospec(idx, num_counters)];
> }
>
> -static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
> -{
> - if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
> - return 0;
> -
> - return vcpu->arch.perf_capabilities;
> -}
> -
> static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
> {
> return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0;
> @@ -456,7 +448,7 @@ static void intel_pmu_enable_fixed_counter_bits(struct kvm_pmu *pmu, u64 bits)
> pmu->fixed_ctr_ctrl_rsvd &= ~intel_fixed_bits_by_idx(i, bits);
> }
>
> -static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> +static void __intel_pmu_refresh(struct kvm_vcpu *vcpu)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
> @@ -564,6 +556,14 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> }
> }
>
> +static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> +{
> + __intel_pmu_refresh(vcpu);
> +
> + exec_controls_changebit(to_vmx(vcpu), CPU_BASED_RDPMC_EXITING,
> + !kvm_rdpmc_in_guest(vcpu));
> +}
> +
> static void intel_pmu_init(struct kvm_vcpu *vcpu)
> {
> int i;
Powered by blists - more mailing lists