lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d5da58b3-6010-46c5-bca0-818b2cee16d7@linux.intel.com>
Date: Wed, 6 Aug 2025 15:28:18 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>,
 Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org, Xin Li
 <xin@...or.com>, Sandipan Das <sandipan.das@....com>
Subject: Re: [PATCH 11/18] KVM: x86/pmu: Calculate set of to-be-emulated PMCs
 at time of WRMSRs


On 8/6/2025 3:05 AM, Sean Christopherson wrote:
> Calculate and track PMCs that are counting instructions/branches retired
> when the PMC's event selector (or fixed counter control) is modified
> instead evaluating the event selector on-demand.  Immediately recalc a
> PMC's configuration on writes to avoid false negatives/positives when
> KVM skips an emulated WRMSR, which is guaranteed to occur before the
> main run loop processes KVM_REQ_PMU.
>
> Out of an abundance of caution, and because it's relatively cheap, recalc
> reprogrammed PMCs in kvm_pmu_handle_event() as well.  Recalculating in
> response to KVM_REQ_PMU _should_ be unnecessary, but for now be paranoid
> to avoid introducing easily-avoidable bugs in edge cases.  The code can be
> removed in the future if necessary, e.g. in the unlikely event that the
> overhead of recalculating to-be-emulated PMCs is noticeable.
>
> Note!  Deliberately don't check the PMU event filters, as doing so could
> result in KVM consuming stale information.
>
> Tracking which PMCs are counting branches/instructions will allow grabbing
> SRCU in the fastpath VM-Exit handlers if and only if a PMC event might be
> triggered (to consult the event filters), and will also allow the upcoming
> mediated PMU to do the right thing with respect to counting instructions
> (the mediated PMU won't be able to update PMCs in the VM-Exit fastpath).
>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 ++
>  arch/x86/kvm/pmu.c              | 75 ++++++++++++++++++++++++---------
>  arch/x86/kvm/pmu.h              |  4 ++
>  3 files changed, 61 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f19a76d3ca0e..d7680612ba1e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -579,6 +579,9 @@ struct kvm_pmu {
>  	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
>  	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
>  
> +	DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
> +	DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
> +
>  	u64 ds_area;
>  	u64 pebs_enable;
>  	u64 pebs_enable_rsvd;
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index e1911b366c43..b0f0275a2c2e 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -542,6 +542,47 @@ static int reprogram_counter(struct kvm_pmc *pmc)
>  				     eventsel & ARCH_PERFMON_EVENTSEL_INT);
>  }
>  
> +static bool pmc_is_event_match(struct kvm_pmc *pmc, u64 eventsel)
> +{
> +	/*
> +	 * Ignore checks for edge detect (all events currently emulated by KVM
> +	 * are always rising edges), pin control (unsupported by modern CPUs),
> +	 * and counter mask and its invert flag (KVM doesn't emulate multiple
> +	 * events in a single clock cycle).
> +	 *
> +	 * Note, the uppermost nibble of AMD's mask overlaps Intel's IN_TX (bit
> +	 * 32) and IN_TXCP (bit 33), as well as two reserved bits (bits 35:34).
> +	 * Checking the "in HLE/RTM transaction" flags is correct as the vCPU
> +	 * can't be in a transaction if KVM is emulating an instruction.
> +	 *
> +	 * Checking the reserved bits might be wrong if they are defined in the
> +	 * future, but so could ignoring them, so do the simple thing for now.
> +	 */
> +	return !((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB);
> +}
> +
> +void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc)
> +{
> +	bitmap_clear(pmu->pmc_counting_instructions, pmc->idx, 1);
> +	bitmap_clear(pmu->pmc_counting_branches, pmc->idx, 1);
> +
> +	/*
> +	 * Do NOT consult the PMU event filters, as the filters must be checked
> +	 * at the time of emulation to ensure KVM uses fresh information, e.g.
> +	 * omitting a PMC from a bitmap could result in a missed event if the
> +	 * filter is changed to allow counting the event.
> +	 */
> +	if (!pmc_speculative_in_use(pmc))
> +		return;
> +
> +	if (pmc_is_event_match(pmc, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED))
> +		bitmap_set(pmu->pmc_counting_instructions, pmc->idx, 1);
> +
> +	if (pmc_is_event_match(pmc, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED))
> +		bitmap_set(pmu->pmc_counting_branches, pmc->idx, 1);
> +}
> +EXPORT_SYMBOL_GPL(kvm_pmu_recalc_pmc_emulation);
> +
>  void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
>  {
>  	DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
> @@ -577,6 +618,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
>  	 */
>  	if (unlikely(pmu->need_cleanup))
>  		kvm_pmu_cleanup(vcpu);
> +
> +	kvm_for_each_pmc(pmu, pmc, bit, bitmap)
> +		kvm_pmu_recalc_pmc_emulation(pmu, pmc);
>  }
>  
>  int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
> @@ -910,7 +954,8 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
>  							 select_user;
>  }
>  
> -static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
> +static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu,
> +				  const unsigned long *event_pmcs)
>  {
>  	DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
>  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -919,29 +964,17 @@ static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
>  
>  	BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
>  
> +	if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX))
> +		return;
> +
>  	if (!kvm_pmu_has_perf_global_ctrl(pmu))
> -		bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
> -	else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
> +		bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX);
> +	else if (!bitmap_and(bitmap, event_pmcs,
>  			     (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
>  		return;
>  
>  	kvm_for_each_pmc(pmu, pmc, i, bitmap) {
> -		/*
> -		 * Ignore checks for edge detect (all events currently emulated
> -		 * but KVM are always rising edges), pin control (unsupported
> -		 * by modern CPUs), and counter mask and its invert flag (KVM
> -		 * doesn't emulate multiple events in a single clock cycle).
> -		 *
> -		 * Note, the uppermost nibble of AMD's mask overlaps Intel's
> -		 * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
> -		 * bits (bits 35:34).  Checking the "in HLE/RTM transaction"
> -		 * flags is correct as the vCPU can't be in a transaction if
> -		 * KVM is emulating an instruction.  Checking the reserved bits
> -		 * might be wrong if they are defined in the future, but so
> -		 * could ignoring them, so do the simple thing for now.
> -		 */
> -		if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
> -		    !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
> +		if (!pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
>  			continue;
>  
>  		kvm_pmu_incr_counter(pmc);
> @@ -950,13 +983,13 @@ static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
>  
>  void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu)
>  {
> -	kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
> +	kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_instructions);
>  }
>  EXPORT_SYMBOL_GPL(kvm_pmu_instruction_retired);
>  
>  void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu)
>  {
> -	kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
> +	kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_branches);
>  }
>  EXPORT_SYMBOL_GPL(kvm_pmu_branch_retired);
>  
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 740af816af37..cb93a936a177 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -176,8 +176,12 @@ extern struct x86_pmu_capability kvm_pmu_cap;
>  
>  void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops);
>  
> +void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc);
> +
>  static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
>  {
> +	kvm_pmu_recalc_pmc_emulation(pmc_to_pmu(pmc), pmc);
> +
>  	set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
>  	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
>  }

Reviewed-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ