[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <478c630b-1dc7-e042-2884-21086c417d99@intel.com>
Date: Mon, 19 Apr 2021 16:17:54 +0800
From: "Xu, Like" <like.xu@...el.com>
To: Liuxiangdong <liuxiangdong5@...wei.com>
Cc: kvm@...r.kernel.org, x86@...nel.org, linux-kernel@...r.kernel.org,
Andi Kleen <ak@...ux.intel.com>,
Like Xu <like.xu@...ux.intel.com>,
Xiexiangyou <xiexiangyou@...wei.com>,
"Fangyi (Eric)" <eric.fangyi@...wei.com>
Subject: Re: [PATCH v5 06/16] KVM: x86/pmu: Reprogram PEBS event to emulate
guest PEBS counter
On 2021/4/19 16:11, Liuxiangdong wrote:
>
>
> On 2021/4/15 11:20, Like Xu wrote:
>> When a guest counter is configured as a PEBS counter through
>> IA32_PEBS_ENABLE, a guest PEBS event will be reprogrammed by
>> configuring a non-zero precision level in the perf_event_attr.
>>
>> The guest PEBS overflow PMI bit would be set in the guest
>> GLOBAL_STATUS MSR when PEBS facility generates a PEBS
>> overflow PMI based on guest IA32_DS_AREA MSR.
>>
>> Even with the same counter index and the same event code and
>> mask, guest PEBS events will not be reused for non-PEBS events.
>>
>> Originally-by: Andi Kleen <ak@...ux.intel.com>
>> Co-developed-by: Kan Liang <kan.liang@...ux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
>> Signed-off-by: Like Xu <like.xu@...ux.intel.com>
>> ---
>> arch/x86/kvm/pmu.c | 34 ++++++++++++++++++++++++++++++++--
>> 1 file changed, 32 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>> index 827886c12c16..0f86c1142f17 100644
>> --- a/arch/x86/kvm/pmu.c
>> +++ b/arch/x86/kvm/pmu.c
>> @@ -74,11 +74,21 @@ static void kvm_perf_overflow_intr(struct perf_event
>> *perf_event,
>> {
>> struct kvm_pmc *pmc = perf_event->overflow_handler_context;
>> struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>> + bool skip_pmi = false;
>> if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
>> - __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
>> + if (perf_event->attr.precise_ip) {
>> + /* Indicate PEBS overflow PMI to guest. */
>> + skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
>> + (unsigned long *)&pmu->global_status);
>> + } else {
>> + __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
>> + }
>> kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
>> + if (skip_pmi)
>> + return;
>> +
>> /*
>> * Inject PMI. If vcpu was in a guest mode during NMI PMI
>> * can be ejected on a guest mode re-entry. Otherwise we can't
>> @@ -99,6 +109,7 @@ static void pmc_reprogram_counter(struct kvm_pmc
>> *pmc, u32 type,
>> bool exclude_kernel, bool intr,
>> bool in_tx, bool in_tx_cp)
>> {
>> + struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
>> struct perf_event *event;
>> struct perf_event_attr attr = {
>> .type = type,
>> @@ -110,6 +121,7 @@ static void pmc_reprogram_counter(struct kvm_pmc
>> *pmc, u32 type,
>> .exclude_kernel = exclude_kernel,
>> .config = config,
>> };
>> + bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
>
> pebs_enable is defined in patch 07, but used here(in patch 06).
> Maybe we can change the patches order in next patch version if necessary.
Thanks! I'll fix it.
>
>> attr.sample_period = get_sample_period(pmc, pmc->counter);
>> @@ -124,9 +136,23 @@ static void pmc_reprogram_counter(struct kvm_pmc
>> *pmc, u32 type,
>> attr.sample_period = 0;
>> attr.config |= HSW_IN_TX_CHECKPOINTED;
>> }
>> + if (pebs) {
>> + /*
>> + * The non-zero precision level of guest event makes the ordinary
>> + * guest event becomes a guest PEBS event and triggers the host
>> + * PEBS PMI handler to determine whether the PEBS overflow PMI
>> + * comes from the host counters or the guest.
>> + *
>> + * For most PEBS hardware events, the difference in the software
>> + * precision levels of guest and host PEBS events will not affect
>> + * the accuracy of the PEBS profiling result, because the
>> "event IP"
>> + * in the PEBS record is calibrated on the guest side.
>> + */
>> + attr.precise_ip = 1;
>> + }
>> event = perf_event_create_kernel_counter(&attr, -1, current,
>> - intr ? kvm_perf_overflow_intr :
>> + (intr || pebs) ? kvm_perf_overflow_intr :
>> kvm_perf_overflow, pmc);
>> if (IS_ERR(event)) {
>> pr_debug_ratelimited("kvm_pmu: event creation failed %ld for
>> pmc->idx = %d\n",
>> @@ -161,6 +187,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
>> get_sample_period(pmc, pmc->counter)))
>> return false;
>> + if (!test_bit(pmc->idx, (unsigned long
>> *)&pmc_to_pmu(pmc)->pebs_enable) &&
>> + pmc->perf_event->attr.precise_ip)
>> + return false;
>> +
>> /* reuse perf_event to serve as pmc_reprogram_counter() does*/
>> perf_event_enable(pmc->perf_event);
>
Powered by blists - more mailing lists