linux-kernel - Re: [PATCH 09/15] KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e3ff6ef52a5022b62b98e23960b9bbe85d60182e.camel@redhat.com>
Date:   Mon, 10 May 2021 11:25:28 +0300
From:   Maxim Levitsky <mlevitsk@...hat.com>
To:     Sean Christopherson <seanjc@...gle.com>,
        Paolo Bonzini <pbonzini@...hat.com>
Cc:     Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, Xiaoyao Li <xiaoyao.li@...el.com>,
        Reiji Watanabe <reijiw@...gle.com>
Subject: Re: [PATCH 09/15] KVM: VMX: Use flag to indicate "active" uret MSRs
 instead of sorting list

On Tue, 2021-05-04 at 10:17 -0700, Sean Christopherson wrote:
> Explicitly flag a uret MSR as needing to be loaded into hardware instead of
> resorting the list of "active" MSRs and tracking how many MSRs in total
> need to be loaded.  The only benefit to sorting the list is that the loop
> to load MSRs during vmx_prepare_switch_to_guest() doesn't need to iterate
> over all supported uret MRS, only those that are active.  But that is a
> pointless optimization, as the most common case, running a 64-bit guest,
> will load the vast majority of MSRs.  Not to mention that a single WRMSR is
> far more expensive than iterating over the list.
> 
> Providing a stable list order obviates the need to track a given MSR's
> "slot" in the per-CPU list of user return MSRs; all lists simply use the
> same ordering.  Future patches will take advantage of the stable order to
> further simplify the related code.

> 
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 80 ++++++++++++++++++++++--------------------
>  arch/x86/kvm/vmx/vmx.h |  2 +-
>  2 files changed, 42 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 68454b0de2b1..6caabcd5037e 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -458,8 +458,9 @@ static unsigned long host_idt_base;
>   * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
>   * will emulate SYSCALL in legacy mode if the vendor string in guest
>   * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
> - * support this emulation, IA32_STAR must always be included in
> - * vmx_uret_msrs_list[], even in i386 builds.
> + * support this emulation, MSR_STAR is included in the list for i386,
> + * but is never loaded into hardware.  MSR_CSTAR is also never loaded
> + * into hardware and is here purely for emulation purposes.
>   */
>  static u32 vmx_uret_msrs_list[] = {
>  #ifdef CONFIG_X86_64
> @@ -702,18 +703,12 @@ static bool is_valid_passthrough_msr(u32 msr)
>  	return r;
>  }
>  
> -static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
> +static inline int __vmx_find_uret_msr(u32 msr)
>  {
>  	int i;
>  
> -	/*
> -	 * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
> -	 * but is ordered differently.  The MSR is matched against the list of
> -	 * supported uret MSRs using "slot", but the index that is returned is
> -	 * the index into guest_uret_msrs.
> -	 */
>  	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> -		if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
> +		if (vmx_uret_msrs_list[i] == msr)
>  			return i;
>  	}
>  	return -1;
> @@ -723,7 +718,7 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
>  {
>  	int i;
>  
> -	i = __vmx_find_uret_msr(vmx, msr);
> +	i = __vmx_find_uret_msr(msr);
>  	if (i >= 0)
>  		return &vmx->guest_uret_msrs[i];
>  	return NULL;
> @@ -732,13 +727,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
>  static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
>  				  struct vmx_uret_msr *msr, u64 data)
>  {
> +	unsigned int slot = msr - vmx->guest_uret_msrs;
>  	int ret = 0;
>  
>  	u64 old_msr_data = msr->data;
>  	msr->data = data;
> -	if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
> +	if (msr->load_into_hardware) {
>  		preempt_disable();
> -		ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
> +		ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
>  		preempt_enable();
>  		if (ret)
>  			msr->data = old_msr_data;
> @@ -1090,7 +1086,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
>  		return false;
>  	}
>  
> -	i = __vmx_find_uret_msr(vmx, MSR_EFER);
> +	i = __vmx_find_uret_msr(MSR_EFER);
>  	if (i < 0)
>  		return false;
>  
> @@ -1252,11 +1248,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>  	 */
>  	if (!vmx->guest_uret_msrs_loaded) {
>  		vmx->guest_uret_msrs_loaded = true;
> -		for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
> -			kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
> +		for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> +			if (!vmx->guest_uret_msrs[i].load_into_hardware)
> +				continue;
> +
> +			kvm_set_user_return_msr(i,
>  						vmx->guest_uret_msrs[i].data,
>  						vmx->guest_uret_msrs[i].mask);
> -
> +		}
>  	}
>  
>      	if (vmx->nested.need_vmcs12_to_shadow_sync)
> @@ -1763,19 +1762,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
>  	vmx_clear_hlt(vcpu);
>  }
>  
> -static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
> +static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
> +			       bool load_into_hardware)
>  {
> -	struct vmx_uret_msr tmp;
> -	int from, to;
> +	struct vmx_uret_msr *uret_msr;
>  
> -	from = __vmx_find_uret_msr(vmx, msr);
> -	if (from < 0)
> +	uret_msr = vmx_find_uret_msr(vmx, msr);
> +	if (!uret_msr)
>  		return;
> -	to = vmx->nr_active_uret_msrs++;
>  
> -	tmp = vmx->guest_uret_msrs[to];
> -	vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
> -	vmx->guest_uret_msrs[from] = tmp;
> +	uret_msr->load_into_hardware = load_into_hardware;
>  }
>  
>  /*
> @@ -1785,30 +1781,36 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
>   */
>  static void setup_msrs(struct vcpu_vmx *vmx)
>  {
> -	vmx->guest_uret_msrs_loaded = false;
> -	vmx->nr_active_uret_msrs = 0;
>  #ifdef CONFIG_X86_64
> +	bool load_syscall_msrs;
> +
>  	/*
>  	 * The SYSCALL MSRs are only needed on long mode guests, and only
>  	 * when EFER.SCE is set.
>  	 */
> -	if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
> -		vmx_setup_uret_msr(vmx, MSR_STAR);
> -		vmx_setup_uret_msr(vmx, MSR_LSTAR);
> -		vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
> -	}
> +	load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
> +			    (vmx->vcpu.arch.efer & EFER_SCE);
> +
> +	vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
> +	vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
> +	vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
>  #endif
> -	if (update_transition_efer(vmx))
> -		vmx_setup_uret_msr(vmx, MSR_EFER);
> +	vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
>  
> -	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)  ||
> -	    guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID))
> -		vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
> +	vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
> +			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
> +			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
>  
> -	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
> +	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true);
>  
>  	if (cpu_has_vmx_msr_bitmap())
>  		vmx_update_msr_bitmap(&vmx->vcpu);
> +
> +	/*
> +	 * The set of MSRs to load may have changed, reload MSRs before the
> +	 * next VM-Enter.
> +	 */
> +	vmx->guest_uret_msrs_loaded = false;
>  }
>  
>  static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index d71ed8b425c5..16e4e457ba23 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -36,7 +36,7 @@ struct vmx_msrs {
>  };
>  
>  struct vmx_uret_msr {
> -	unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
> +	bool load_into_hardware;
>  	u64 data;
>  	u64 mask;
>  };

This is a very welcomed change, the old code was very complicated
to understand.

However I still feel that it would be nice to have a comment explaining
that vmx->guest_uret_msrs follow the same order now as the (eventually common)
uret msr list, and basically is a parallel array that extends the 
percpu 'user_return_msrs' and the global kvm_uret_msrs_list arrays.

In fact why not to fold the vmx->guest_uret_msrs into the x86 common uret msr list?
There is nothing VMX specific in this list IMHO and SVM can use it as well,
in fact it has 'svm->tsc_aux' which is the 'data' field of a 'struct vmx_uret_msr'


Reviewed-by: Maxim Levitsky <mlevitsk@...il.com>

Best regards,
	Maxim Levitsky