[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0e1526dd-12e6-ad35-e69c-4eb58ed032e0@redhat.com>
Date: Mon, 2 Aug 2021 17:46:20 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: Like Xu <like.xu.linux@...il.com>
Cc: Sean Christopherson <seanjc@...gle.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>,
Thomas Gleixner <tglx@...utronix.de>, kvm@...r.kernel.org,
x86@...nel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] KVM: x86/pmu: Introduce pmc->is_paused to reduce the call
time of perf interfaces
On 28/07/21 14:07, Like Xu wrote:
> From: Like Xu <likexu@...cent.com>
>
> Based on our observations, after any vm-exit associated with vPMU, there
> are at least two or more perf interfaces to be called for guest counter
> emulation, such as perf_event_{pause, read_value, period}(), and each one
> will {lock, unlock} the same perf_event_ctx. The frequency of calls becomes
> more severe when guest use counters in a multiplexed manner.
>
> Holding a lock once and completing the KVM request operations in the perf
> context would introduce a set of impractical new interfaces. So we can
> further optimize the vPMU implementation by avoiding repeated calls to
> these interfaces in the KVM context for at least one pattern:
>
> After we call perf_event_pause() once, the event will be disabled and its
> internal count will be reset to 0. So there is no need to pause it again
> or read its value. Once the event is paused, event period will not be
> updated until the next time it's resumed or reprogrammed. And there is
> also no need to call perf_event_period twice for a non-running counter,
> considering the perf_event for a running counter is never paused.
>
> Based on this implementation, for the following common usage of
> sampling 4 events using perf on a 4u8g guest:
>
> echo 0 > /proc/sys/kernel/watchdog
> echo 25 > /proc/sys/kernel/perf_cpu_time_max_percent
> echo 10000 > /proc/sys/kernel/perf_event_max_sample_rate
> echo 0 > /proc/sys/kernel/perf_cpu_time_max_percent
> for i in `seq 1 1 10`
> do
> taskset -c 0 perf record \
> -e cpu-cycles -e instructions -e branch-instructions -e cache-misses \
> /root/br_instr a
> done
>
> the average latency of the guest NMI handler is reduced from
> 37646.7 ns to 32929.3 ns (~1.14x speed up) on the Intel ICX server.
> Also, in addition to collecting more samples, no loss of sampling
> accuracy was observed compared to before the optimization.
>
> Signed-off-by: Like Xu <likexu@...cent.com>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/pmu.c | 5 ++++-
> arch/x86/kvm/pmu.h | 2 +-
> arch/x86/kvm/vmx/pmu_intel.c | 4 ++--
> 4 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 99f37781a6fc..a079880d4cd5 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -482,6 +482,7 @@ struct kvm_pmc {
> * ctrl value for fixed counters.
> */
> u64 current_config;
> + bool is_paused;
> };
>
> struct kvm_pmu {
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 827886c12c16..0772bad9165c 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -137,18 +137,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
> pmc->perf_event = event;
> pmc_to_pmu(pmc)->event_count++;
> clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
> + pmc->is_paused = false;
> }
>
> static void pmc_pause_counter(struct kvm_pmc *pmc)
> {
> u64 counter = pmc->counter;
>
> - if (!pmc->perf_event)
> + if (!pmc->perf_event || pmc->is_paused)
> return;
>
> /* update counter, reset event value to avoid redundant accumulation */
> counter += perf_event_pause(pmc->perf_event, true);
> pmc->counter = counter & pmc_bitmask(pmc);
> + pmc->is_paused = true;
> }
>
> static bool pmc_resume_counter(struct kvm_pmc *pmc)
> @@ -163,6 +165,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
>
> /* reuse perf_event to serve as pmc_reprogram_counter() does*/
> perf_event_enable(pmc->perf_event);
> + pmc->is_paused = false;
>
> clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
> return true;
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 67e753edfa22..0e4f2b1fa9fb 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -55,7 +55,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
> u64 counter, enabled, running;
>
> counter = pmc->counter;
> - if (pmc->perf_event)
> + if (pmc->perf_event && !pmc->is_paused)
> counter += perf_event_read_value(pmc->perf_event,
> &enabled, &running);
> /* FIXME: Scaling needed? */
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 9efc1a6b8693..10cc4f65c4ef 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -437,13 +437,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> !(msr & MSR_PMC_FULL_WIDTH_BIT))
> data = (s64)(s32)data;
> pmc->counter += data - pmc_read_counter(pmc);
> - if (pmc->perf_event)
> + if (pmc->perf_event && !pmc->is_paused)
> perf_event_period(pmc->perf_event,
> get_sample_period(pmc, data));
> return 0;
> } else if ((pmc = get_fixed_pmc(pmu, msr))) {
> pmc->counter += data - pmc_read_counter(pmc);
> - if (pmc->perf_event)
> + if (pmc->perf_event && !pmc->is_paused)
> perf_event_period(pmc->perf_event,
> get_sample_period(pmc, data));
> return 0;
>
Queued, thanks.
Paolo
Powered by blists - more mailing lists