linux-kernel - Re: [PATCH] KVM: SVM: fix tsc scaling cache logic

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f6b48819-9c0e-69f7-de07-2d49cd0aa1c1@redhat.com>
Date:   Tue, 7 Jun 2022 17:26:04 +0200
From:   Paolo Bonzini <pbonzini@...hat.com>
To:     Maxim Levitsky <mlevitsk@...hat.com>, kvm@...r.kernel.org
Cc:     Joerg Roedel <joro@...tes.org>,
        Sean Christopherson <seanjc@...gle.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        linux-kernel@...r.kernel.org, Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Borislav Petkov <bp@...en8.de>,
        Ilias Stamatis <ilstam@...zon.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
        Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH] KVM: SVM: fix tsc scaling cache logic

On 6/6/22 20:11, Maxim Levitsky wrote:
> SVM uses a per-cpu variable to cache the current value of the
> tsc scaling multiplier msr on each cpu.
> 
> Commit 1ab9287add5e2
> ("KVM: X86: Add vendor callbacks for writing the TSC multiplier")
> broke this caching logic.
> 
> Refactor the code so that all TSC scaling multiplier writes go through
> a single function which checks and updates the cache.
> 
> This fixes the following scenario:
> 
> 1. A CPU runs a guest with some tsc scaling ratio.
> 
> 2. New guest with different tsc scaling ratio starts on this CPU
>     and terminates almost immediately.
> 
>     This ensures that the short running guest had set the tsc scaling ratio just
>     once when it was set via KVM_SET_TSC_KHZ. Due to the bug,
>     the per-cpu cache is not updated.
> 
> 3. The original guest continues to run, it doesn't restore the msr
>     value back to its own value, because the cache matches,
>     and thus continues to run with a wrong tsc scaling ratio.
> 
> 
> Fixes: 1ab9287add5e2 ("KVM: X86: Add vendor callbacks for writing the TSC multiplier")
> Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> ---
>   arch/x86/kvm/svm/nested.c |  4 ++--
>   arch/x86/kvm/svm/svm.c    | 32 ++++++++++++++++++++------------
>   arch/x86/kvm/svm/svm.h    |  2 +-
>   3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 88da8edbe1e1f..83bae1f2eeb8a 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -1037,7 +1037,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>   	if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
>   		WARN_ON(!svm->tsc_scaling_enabled);
>   		vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
> -		svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
> +		__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
>   	}
>   
>   	svm->nested.ctl.nested_cr3 = 0;
> @@ -1442,7 +1442,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
>   	vcpu->arch.tsc_scaling_ratio =
>   		kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
>   					       svm->tsc_ratio_msr);
> -	svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
> +	__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
>   }
>   
>   /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 4aea82f668fb1..5c873db9432e5 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -512,11 +512,24 @@ static int has_svm(void)
>   	return 1;
>   }
>   
> +void __svm_write_tsc_multiplier(u64 multiplier)
> +{
> +	preempt_disable();
> +
> +	if (multiplier == __this_cpu_read(current_tsc_ratio))
> +		goto out;
> +
> +	wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
> +	__this_cpu_write(current_tsc_ratio, multiplier);
> +out:
> +	preempt_enable();
> +}
> +
>   static void svm_hardware_disable(void)
>   {
>   	/* Make sure we clean up behind us */
>   	if (tsc_scaling)
> -		wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT);
> +		__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
>   
>   	cpu_svm_disable();
>   
> @@ -562,8 +575,7 @@ static int svm_hardware_enable(void)
>   		 * Set the default value, even if we don't use TSC scaling
>   		 * to avoid having stale value in the msr
>   		 */
> -		wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT);
> -		__this_cpu_write(current_tsc_ratio, SVM_TSC_RATIO_DEFAULT);
> +		__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
>   	}
>   
>   
> @@ -1046,11 +1058,12 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>   	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
>   }
>   
> -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
> +static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
>   {
> -	wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
> +	__svm_write_tsc_multiplier(multiplier);
>   }
>   
> +
>   /* Evaluate instruction intercepts that depend on guest CPUID features. */
>   static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
>   					      struct vcpu_svm *svm)
> @@ -1410,13 +1423,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>   		sev_es_prepare_switch_to_guest(hostsa);
>   	}
>   
> -	if (tsc_scaling) {
> -		u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
> -		if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
> -			__this_cpu_write(current_tsc_ratio, tsc_ratio);
> -			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
> -		}
> -	}
> +	if (tsc_scaling)
> +		__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
>   
>   	if (likely(tsc_aux_uret_slot >= 0))
>   		kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index cd92f43437539..2495fe548b5e9 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -594,7 +594,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
>   			       bool has_error_code, u32 error_code);
>   int nested_svm_exit_special(struct vcpu_svm *svm);
>   void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
> -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
> +void __svm_write_tsc_multiplier(u64 multiplier);
>   void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
>   				       struct vmcb_control_area *control);
>   void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,

Queued, thanks.

Paolo