lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <517ee0b7ba1a68a63e9e1068ec2570c62471d695.camel@redhat.com>
Date: Thu, 01 May 2025 16:34:34 -0400
From: mlevitsk@...hat.com
To: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>, kvm@...r.kernel.org
Cc: Thomas Gleixner <tglx@...utronix.de>, Borislav Petkov <bp@...en8.de>, 
 Paolo Bonzini <pbonzini@...hat.com>, x86@...nel.org, Sean Christopherson
 <seanjc@...gle.com>, Dave Hansen <dave.hansen@...ux.intel.com>, Ingo Molnar
 <mingo@...hat.com>,  linux-kernel@...r.kernel.org, "H. Peter Anvin"
 <hpa@...or.com>
Subject: Re: [PATCH 1/3] x86: KVM: VMX: Wrap GUEST_IA32_DEBUGCTL read/write
 with access functions

On Wed, 2025-04-23 at 17:51 +0800, Mi, Dapeng wrote:
> The shortlog "x86: KVM: VMX: Wrap GUEST_IA32_DEBUGCTL read/write with
> access functions" doesn't follow Sean's suggestion
> (https://github.com/kvm-x86/linux/blob/next/Documentation/process/maintainer-kvm-x86.rst#shortlog).
> Please modify. Thanks.
> 
> 
> On 4/16/2025 8:25 AM, Maxim Levitsky wrote:
> > Instead of reading and writing GUEST_IA32_DEBUGCTL vmcs field directly,
> > wrap the logic with get/set functions.
> > 
> > Also move the checks that the guest's supplied value is valid to the new
> > 'set' function.
> > 
> > In particular, the above change fixes a minor security issue in which L1
> > hypervisor could set the GUEST_IA32_DEBUGCTL, and eventually the host's
> > MSR_IA32_DEBUGCTL to any value by performing a VM entry to L2 with
> > VM_ENTRY_LOAD_DEBUG_CONTROLS set.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> > ---
> >  arch/x86/kvm/vmx/nested.c    | 15 +++++++---
> >  arch/x86/kvm/vmx/pmu_intel.c |  9 +++---
> >  arch/x86/kvm/vmx/vmx.c       | 58 +++++++++++++++++++++++-------------
> >  arch/x86/kvm/vmx/vmx.h       |  3 ++
> >  4 files changed, 57 insertions(+), 28 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index e073e3008b16..b7686569ee09 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -2641,6 +2641,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> >  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >  	struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
> >  	bool load_guest_pdptrs_vmcs12 = false;
> > +	u64 new_debugctl;
> >  
> >  	if (vmx->nested.dirty_vmcs12 || nested_vmx_is_evmptr12_valid(vmx)) {
> >  		prepare_vmcs02_rare(vmx, vmcs12);
> > @@ -2653,11 +2654,17 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> >  	if (vmx->nested.nested_run_pending &&
> >  	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
> >  		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
> > -		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
> > +		new_debugctl = vmcs12->guest_ia32_debugctl;
> >  	} else {
> >  		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
> > -		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
> > +		new_debugctl = vmx->nested.pre_vmenter_debugctl;
> >  	}
> > +
> > +	if (CC(!vmx_set_guest_debugctl(vcpu, new_debugctl, false))) {
> > +		*entry_failure_code = ENTRY_FAIL_DEFAULT;
> > +		return -EINVAL;
> > +	}
> > +
> >  	if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
> >  	    !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
> >  		vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
> > @@ -3520,7 +3527,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
> >  
> >  	if (!vmx->nested.nested_run_pending ||
> >  	    !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
> > -		vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> > +		vmx->nested.pre_vmenter_debugctl = vmx_get_guest_debugctl(vcpu);
> >  	if (kvm_mpx_supported() &&
> >  	    (!vmx->nested.nested_run_pending ||
> >  	     !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
> > @@ -4788,7 +4795,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> >  	__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
> >  
> >  	kvm_set_dr(vcpu, 7, 0x400);
> > -	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> > +	vmx_set_guest_debugctl(vcpu, 0, false);
> >  
> >  	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
> >  				vmcs12->vm_exit_msr_load_count))
> > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> > index 8a94b52c5731..f6f448adfb80 100644
> > --- a/arch/x86/kvm/vmx/pmu_intel.c
> > +++ b/arch/x86/kvm/vmx/pmu_intel.c
> > @@ -19,6 +19,7 @@
> >  #include "lapic.h"
> >  #include "nested.h"
> >  #include "pmu.h"
> > +#include "vmx.h"
> >  #include "tdx.h"
> >  
> >  /*
> > @@ -652,11 +653,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
> >   */
> >  static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
> >  {
> > -	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> > +	u64 data = vmx_get_guest_debugctl(vcpu);
> >  
> >  	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> >  		data &= ~DEBUGCTLMSR_LBR;
> > -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> > +		vmx_set_guest_debugctl(vcpu, data, true);
> 
> Two questions. 
> 
> 1. why to call vmx_set_guest_debugctl() to do the extra check? currently
> IA32_DEBUGCTL MSR is always intercepted and it's already checked at
> vmx_set_msr() and seems unnecessary to check here again.

Hi,


I wanted this to be consistent. KVM has plenty of functions that can be both
guest triggered and internally triggered. For example kvm_set_cr4()

Besides the vmx_set_guest_debugctl also notes the value the guest wrote
to be able to return it back to the guest if we choose to overide some
bits of the MSR, so it made sense to have one common function to set the msr.

Do you think that can affect performance? 


> 
> 2. why the argument "host_initiated" is true? It looks the data is not from
> host.

This is my mistake.

> 
> 
> >  	}
> >  }
> >  
> > @@ -729,7 +730,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
> >  
> >  	if (!lbr_desc->event) {
> >  		vmx_disable_lbr_msrs_passthrough(vcpu);
> > -		if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
> > +		if (vmx_get_guest_debugctl(vcpu) & DEBUGCTLMSR_LBR)
> >  			goto warn;
> >  		if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
> >  			goto warn;
> > @@ -751,7 +752,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
> >  
> >  static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
> >  {
> > -	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
> > +	if (!(vmx_get_guest_debugctl(vcpu) & DEBUGCTLMSR_LBR))
> >  		intel_pmu_release_guest_lbr_event(vcpu);
> >  }
> >  
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index ef2d7208dd20..4237422dc4ed 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -2154,7 +2154,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
> >  		break;
> >  	case MSR_IA32_DEBUGCTLMSR:
> > -		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> > +		msr_info->data = vmx_get_guest_debugctl(vcpu);
> >  		break;
> >  	default:
> >  	find_uret_msr:
> > @@ -2194,6 +2194,41 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
> >  	return debugctl;
> >  }
> >  
> > +u64 vmx_get_guest_debugctl(struct kvm_vcpu *vcpu)
> > +{
> > +	return vmcs_read64(GUEST_IA32_DEBUGCTL);
> > +}
> > +
> > +static void __vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 data)
> > +{
> > +	vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> > +}
> 
> IMO,  it seems unnecessary to add these 2  wrappers since the original code
> is already intuitive enough and simple. But if you want, please add
> "inline" before these 2 wrappers.

The __vmx_set_guest_debugctl in the next patch will store the written value in
a field, this is why I did it this way.

The vmx_get_guest_debugctl will read this value instead, also in the next patch.

I thought it would be cleaner to first introduce the trivial wrappers and then
extend them.

> 
> 
> > +
> > +bool vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated)
> 
> Since most of code in this function checks guest debugctl, better to rename
> it to "vmx_check_and_set_guest_debugctl".

I don't mind doing so.

> 
> 
> > +{
> > +	u64 invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated);
> > +
> > +	if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
> > +		kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
> > +		data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> > +		invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> 
> Add space around above 3 "|".

I copied this code "as is" from the wrmsr code. I can add this though.

Best regards,
	Maxim Levitsky

> 
> 
> > +	}
> > +
> > +	if (invalid)
> > +		return false;
> > +
> > +	if (is_guest_mode(vcpu) && (get_vmcs12(vcpu)->vm_exit_controls &
> > +					VM_EXIT_SAVE_DEBUG_CONTROLS))
> > +		get_vmcs12(vcpu)->guest_ia32_debugctl = data;
> > +
> > +	if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
> > +	    (data & DEBUGCTLMSR_LBR))
> > +		intel_pmu_create_guest_lbr_event(vcpu);
> > +
> > +	__vmx_set_guest_debugctl(vcpu, data);
> > +	return true;
> > +}
> > +
> >  /*
> >   * Writes msr value into the appropriate "register".
> >   * Returns 0 on success, non-0 otherwise.
> > @@ -2263,26 +2298,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		vmcs_writel(GUEST_SYSENTER_ESP, data);
> >  		break;
> >  	case MSR_IA32_DEBUGCTLMSR: {
> > -		u64 invalid;
> > -
> > -		invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
> > -		if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
> > -			kvm_pr_unimpl_wrmsr(vcpu, msr_index, data);
> > -			data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> > -			invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
> > -		}
> > -
> > -		if (invalid)
> > +		if (!vmx_set_guest_debugctl(vcpu, data, msr_info->host_initiated))
> >  			return 1;
> >  
> > -		if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
> > -						VM_EXIT_SAVE_DEBUG_CONTROLS)
> > -			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
> > -
> > -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> > -		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
> > -		    (data & DEBUGCTLMSR_LBR))
> > -			intel_pmu_create_guest_lbr_event(vcpu);
> >  		return 0;
> >  	}
> >  	case MSR_IA32_BNDCFGS:
> > @@ -4795,7 +4813,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
> >  	vmcs_write32(GUEST_SYSENTER_CS, 0);
> >  	vmcs_writel(GUEST_SYSENTER_ESP, 0);
> >  	vmcs_writel(GUEST_SYSENTER_EIP, 0);
> > -	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> > +	__vmx_set_guest_debugctl(&vmx->vcpu, 0);
> >  
> >  	if (cpu_has_vmx_tpr_shadow()) {
> >  		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > index 6d1e40ecc024..8ac46fb47abd 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -404,6 +404,9 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
> >  
> >  gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
> >  
> > +bool vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
> > +u64 vmx_get_guest_debugctl(struct kvm_vcpu *vcpu);
> > +
> >  static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
> >  					     int type, bool value)
> >  {


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ