lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d6d79bd0-f6f1-4cd9-aa63-5ddc32985dfd@linux.intel.com>
Date: Mon, 9 Jun 2025 16:19:58 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>,
 Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
 Arnaldo Carvalho de Melo <acme@...nel.org>,
 Namhyung Kim <namhyung@...nel.org>, Paolo Bonzini <pbonzini@...hat.com>
Cc: linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
 kvm@...r.kernel.org
Subject: Re: [PATCH] perf/x86: KVM: Have perf define a dedicated struct for
 getting guest PEBS data


On 6/3/2025 7:51 AM, Sean Christopherson wrote:
> Have perf define a struct for getting guest PEBS data from KVM instead of
> poking into the kvm_pmu structure.  Passing in an entire "struct kvm_pmu"
> _as an opaque pointer_ to get at four fields is silly, especially since
> one of the fields exists purely to convey information to perf, i.e. isn't
> used by KVM.
>
> Perf should also own its APIs, i.e. define what fields/data it needs, not
> rely on KVM to throw fields into data structures that effectively hold
> KVM-internal state.
>
> Opportunistically rephrase the comment about cross-mapped counters to
> explain *why* PEBS needs to be disabled.
>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>  arch/x86/events/core.c            |  5 +++--
>  arch/x86/events/intel/core.c      | 20 ++++++++++----------
>  arch/x86/events/perf_event.h      |  3 ++-
>  arch/x86/include/asm/kvm_host.h   |  9 ---------
>  arch/x86/include/asm/perf_event.h | 13 +++++++++++--
>  arch/x86/kvm/vmx/pmu_intel.c      | 18 +++++++++++++++---
>  arch/x86/kvm/vmx/vmx.c            | 11 +++++++----
>  arch/x86/kvm/vmx/vmx.h            |  2 +-
>  8 files changed, 49 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 139ad80d1df3..6080c3e6e191 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -703,9 +703,10 @@ void x86_pmu_disable_all(void)
>  	}
>  }
>  
> -struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data)
> +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr,
> +						  struct x86_guest_pebs *guest_pebs)
>  {
> -	return static_call(x86_pmu_guest_get_msrs)(nr, data);
> +	return static_call(x86_pmu_guest_get_msrs)(nr, guest_pebs);
>  }
>  EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
>  
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index c5f385413392..364bba216cf4 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -14,7 +14,6 @@
>  #include <linux/slab.h>
>  #include <linux/export.h>
>  #include <linux/nmi.h>
> -#include <linux/kvm_host.h>
>  
>  #include <asm/cpufeature.h>
>  #include <asm/debugreg.h>
> @@ -4332,11 +4331,11 @@ static int intel_pmu_hw_config(struct perf_event *event)
>   * when it uses {RD,WR}MSR, which should be handled by the KVM context,
>   * specifically in the intel_pmu_{get,set}_msr().
>   */
> -static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
> +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr,
> +							  struct x86_guest_pebs *guest_pebs)
>  {
>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>  	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
> -	struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data;
>  	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
>  	u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
>  	int global_ctrl, pebs_enable;
> @@ -4374,20 +4373,20 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
>  		return arr;
>  	}
>  
> -	if (!kvm_pmu || !x86_pmu.pebs_ept)
> +	if (!guest_pebs || !x86_pmu.pebs_ept)
>  		return arr;
>  
>  	arr[(*nr)++] = (struct perf_guest_switch_msr){
>  		.msr = MSR_IA32_DS_AREA,
>  		.host = (unsigned long)cpuc->ds,
> -		.guest = kvm_pmu->ds_area,
> +		.guest = guest_pebs->ds_area,
>  	};
>  
>  	if (x86_pmu.intel_cap.pebs_baseline) {
>  		arr[(*nr)++] = (struct perf_guest_switch_msr){
>  			.msr = MSR_PEBS_DATA_CFG,
>  			.host = cpuc->active_pebs_data_cfg,
> -			.guest = kvm_pmu->pebs_data_cfg,
> +			.guest = guest_pebs->data_cfg,
>  		};
>  	}
>  
> @@ -4395,7 +4394,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
>  	arr[pebs_enable] = (struct perf_guest_switch_msr){
>  		.msr = MSR_IA32_PEBS_ENABLE,
>  		.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
> -		.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable,
> +		.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & guest_pebs->enable,
>  	};
>  
>  	if (arr[pebs_enable].host) {
> @@ -4403,8 +4402,8 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
>  		arr[pebs_enable].guest = 0;
>  	} else {
>  		/* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */
> -		arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
> -		arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask;
> +		arr[pebs_enable].guest &= ~guest_pebs->cross_mapped_mask;
> +		arr[global_ctrl].guest &= ~guest_pebs->cross_mapped_mask;
>  		/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
>  		arr[global_ctrl].guest |= arr[pebs_enable].guest;
>  	}
> @@ -4412,7 +4411,8 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
>  	return arr;
>  }
>  
> -static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
> +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr,
> +							 struct x86_guest_pebs *guest_pebs)
>  {
>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>  	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index 46d120597bab..29ae9e442f2e 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -963,7 +963,8 @@ struct x86_pmu {
>  	/*
>  	 * Intel host/guest support (KVM)
>  	 */
> -	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data);
> +	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr,
> +							struct x86_guest_pebs *guest_pebs);
>  
>  	/*
>  	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 7bc174a1f1cb..2fe0d2520f14 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -583,15 +583,6 @@ struct kvm_pmu {
>  	u64 pebs_data_cfg;
>  	u64 pebs_data_cfg_rsvd;
>  
> -	/*
> -	 * If a guest counter is cross-mapped to host counter with different
> -	 * index, its PEBS capability will be temporarily disabled.
> -	 *
> -	 * The user should make sure that this mask is updated
> -	 * after disabling interrupts and before perf_guest_get_msrs();
> -	 */
> -	u64 host_cross_mapped_mask;
> -
>  	/*
>  	 * The gate to release perf_events not marked in
>  	 * pmc_in_use only once in a vcpu time slice.
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 812dac3f79f0..0edfc3e34813 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -646,11 +646,20 @@ static inline void perf_events_lapic_init(void)	{ }
>  static inline void perf_check_microcode(void) { }
>  #endif
>  
> +struct x86_guest_pebs {
> +	u64	enable;
> +	u64	ds_area;
> +	u64	data_cfg;
> +	u64	cross_mapped_mask;
> +};
> +
>  #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
> +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr,
> +							 struct x86_guest_pebs *guest_pebs);
>  extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
>  #else
> -struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
> +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr,
> +						  struct x86_guest_pebs *guest_pebs);
>  static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
>  {
>  	memset(lbr, 0, sizeof(*lbr));
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 77012b2eca0e..e6ff02b97677 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -705,11 +705,22 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
>  		intel_pmu_release_guest_lbr_event(vcpu);
>  }
>  
> -void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
> +u64 intel_pmu_get_cross_mapped_mask(struct kvm_pmu *pmu)
>  {
> -	struct kvm_pmc *pmc = NULL;
> +	u64 host_cross_mapped_mask;
> +	struct kvm_pmc *pmc;
>  	int bit, hw_idx;
>  
> +	if (!(pmu->pebs_enable & pmu->global_ctrl))
> +		return 0;
> +
> +	/*
> +	 * If a guest counter is cross-mapped to a host counter with a different
> +	 * index, flag it for perf, as PEBS needs to be disabled for that
> +	 * counter to avoid enabling PEBS on the wrong perf event.
> +	 */
> +	host_cross_mapped_mask = 0;
> +
>  	kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
>  		if (!pmc_speculative_in_use(pmc) ||
>  		    !pmc_is_globally_enabled(pmc) || !pmc->perf_event)
> @@ -721,8 +732,9 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
>  		 */
>  		hw_idx = pmc->perf_event->hw.idx;
>  		if (hw_idx != pmc->idx && hw_idx > -1)
> -			pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx);
> +			host_cross_mapped_mask |= BIT_ULL(hw_idx);
>  	}
> +	return host_cross_mapped_mask;
>  }
>  
>  struct kvm_pmu_ops intel_pmu_ops __initdata = {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 5c5766467a61..2a496fd64edc 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7247,12 +7247,15 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
>  	struct perf_guest_switch_msr *msrs;
>  	struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
>  
> -	pmu->host_cross_mapped_mask = 0;
> -	if (pmu->pebs_enable & pmu->global_ctrl)
> -		intel_pmu_cross_mapped_check(pmu);
> +	struct x86_guest_pebs guest_pebs = {
> +		.enable = pmu->pebs_enable,
> +		.ds_area = pmu->ds_area,
> +		.data_cfg = pmu->pebs_data_cfg,
> +		.cross_mapped_mask = intel_pmu_get_cross_mapped_mask(pmu),
> +	};
>  
>  	/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
> -	msrs = perf_guest_get_msrs(&nr_msrs, (void *)pmu);
> +	msrs = perf_guest_get_msrs(&nr_msrs, &guest_pebs);
>  	if (!msrs)
>  		return;
>  
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 951e44dc9d0e..bfcce24919d5 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -677,7 +677,7 @@ static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
>  	return !!vcpu_to_lbr_records(vcpu)->nr;
>  }
>  
> -void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
> +u64 intel_pmu_get_cross_mapped_mask(struct kvm_pmu *pmu);
>  int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
>  void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
>  
>
> base-commit: 0ff41df1cb268fc69e703a08a57ee14ae967d0ca

LGTM.

Reviewed-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ