linux-kernel - Re: [PATCH 08/15] KVM: VMX: Configure list of user return MSRs at module init

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db161b4dd7286870db5adb9324e4941f0dc3f098.camel@redhat.com>
Date:   Mon, 10 May 2021 11:23:29 +0300
From:   Maxim Levitsky <mlevitsk@...hat.com>
To:     Sean Christopherson <seanjc@...gle.com>,
        Paolo Bonzini <pbonzini@...hat.com>
Cc:     Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, Xiaoyao Li <xiaoyao.li@...el.com>,
        Reiji Watanabe <reijiw@...gle.com>
Subject: Re: [PATCH 08/15] KVM: VMX: Configure list of user return MSRs at
 module init

On Tue, 2021-05-04 at 10:17 -0700, Sean Christopherson wrote:
> Configure the list of user return MSRs that are actually supported at
> module init instead of reprobing the list of possible MSRs every time a
> vCPU is created.  Curating the list on a per-vCPU basis is pointless; KVM
> is completely hosed if the set of supported MSRs changes after module init,
> or if the set of MSRs differs per physical PCU.
> 
> The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and
> creates corner cases that _should_ be impossible, but theoretically exist
> in KVM, e.g. advertising RDTSCP to userspace without actually being able to
> virtualize RDTSCP if probing MSR_TSC_AUX fails.
> 
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 61 ++++++++++++++++++++++++++++--------------
>  arch/x86/kvm/vmx/vmx.h | 10 ++++++-
>  2 files changed, 50 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 42e4bbaa299a..68454b0de2b1 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -461,7 +461,7 @@ static unsigned long host_idt_base;
>   * support this emulation, IA32_STAR must always be included in
>   * vmx_uret_msrs_list[], even in i386 builds.
>   */
> -static const u32 vmx_uret_msrs_list[] = {
> +static u32 vmx_uret_msrs_list[] = {
>  #ifdef CONFIG_X86_64
>  	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
>  #endif
> @@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = {
>  	MSR_IA32_TSX_CTRL,
>  };
>  
> +/*
> + * Number of user return MSRs that are actually supported in hardware.
> + * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
> + */
> +static int vmx_nr_uret_msrs;
> +
>  #if IS_ENABLED(CONFIG_HYPERV)
>  static bool __read_mostly enlightened_vmcs = true;
>  module_param(enlightened_vmcs, bool, 0444);
> @@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
>  {
>  	int i;
>  
> -	for (i = 0; i < vmx->nr_uret_msrs; ++i)
> +	/*
> +	 * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
> +	 * but is ordered differently.  The MSR is matched against the list of
> +	 * supported uret MSRs using "slot", but the index that is returned is
> +	 * the index into guest_uret_msrs.
> +	 */
> +	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
>  		if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
>  			return i;
> +	}
>  	return -1;
>  }
>  
> @@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
>  			goto free_vpid;
>  	}
>  
> -	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
> +	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> +		vmx->guest_uret_msrs[i].data = 0;
>  
> -	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
> -		u32 index = vmx_uret_msrs_list[i];
> -		int j = vmx->nr_uret_msrs;
> -
> -		if (kvm_probe_user_return_msr(index))
> -			continue;
> -
> -		vmx->guest_uret_msrs[j].slot = i;
I don't see anything initalizing the .slot after this patch.
Now this code is removed later which masks this bug, 
but for the bisect sake, I think that this patch 
should still be fixed.

Other than this minor bug:

Reviewed-by: Maxim Levitsky <mlevitsk@...hat.com>

Best regards,
	Maxim Levitsky



> -		vmx->guest_uret_msrs[j].data = 0;
> -		switch (index) {
> +		switch (vmx_uret_msrs_list[i]) {
>  		case MSR_IA32_TSX_CTRL:
>  			/*
>  			 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
> @@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
>  			 * host so that TSX remains always disabled.
>  			 */
>  			if (boot_cpu_has(X86_FEATURE_RTM))
> -				vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
> +				vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
>  			else
> -				vmx->guest_uret_msrs[j].mask = 0;
> +				vmx->guest_uret_msrs[i].mask = 0;
>  			break;
>  		default:
> -			vmx->guest_uret_msrs[j].mask = -1ull;
> +			vmx->guest_uret_msrs[i].mask = -1ull;
>  			break;
>  		}
> -		++vmx->nr_uret_msrs;
>  	}
>  
>  	err = alloc_loaded_vmcs(&vmx->vmcs01);
> @@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
>  	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
>  };
>  
> +static __init void vmx_setup_user_return_msrs(void)
> +{
> +	u32 msr;
> +	int i;
> +
> +	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
> +
> +	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
> +		msr = vmx_uret_msrs_list[i];
> +
> +		if (kvm_probe_user_return_msr(msr))
> +			continue;
> +
> +		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
> +		vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
> +	}
> +}
> +
>  static __init int hardware_setup(void)
>  {
>  	unsigned long host_bndcfgs;
>  	struct desc_ptr dt;
> -	int r, i, ept_lpage_level;
> +	int r, ept_lpage_level;
>  
>  	store_idt(&dt);
>  	host_idt_base = dt.address;
>  
> -	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
> -		kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
> +	vmx_setup_user_return_msrs();
>  
>  	if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
>  		return -EIO;
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 008cb87ff088..d71ed8b425c5 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -245,8 +245,16 @@ struct vcpu_vmx {
>  	u32                   idt_vectoring_info;
>  	ulong                 rflags;
>  
> +	/*
> +	 * User return MSRs are always emulated when enabled in the guest, but
> +	 * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
> +	 * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
> +	 * be loaded into hardware if those conditions aren't met.
> +	 * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
> +	 * into hardware when running the guest.  guest_uret_msrs[] is resorted
> +	 * whenever the number of "active" uret MSRs is modified.
> +	 */
>  	struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
> -	int                   nr_uret_msrs;
>  	int                   nr_active_uret_msrs;
>  	bool                  guest_uret_msrs_loaded;
>  #ifdef CONFIG_X86_64