[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f6b48819-9c0e-69f7-de07-2d49cd0aa1c1@redhat.com>
Date: Tue, 7 Jun 2022 17:26:04 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: Maxim Levitsky <mlevitsk@...hat.com>, kvm@...r.kernel.org
Cc: Joerg Roedel <joro@...tes.org>,
Sean Christopherson <seanjc@...gle.com>,
Thomas Gleixner <tglx@...utronix.de>,
linux-kernel@...r.kernel.org, Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Borislav Petkov <bp@...en8.de>,
Ilias Stamatis <ilstam@...zon.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
"H. Peter Anvin" <hpa@...or.com>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH] KVM: SVM: fix tsc scaling cache logic
On 6/6/22 20:11, Maxim Levitsky wrote:
> SVM uses a per-cpu variable to cache the current value of the
> tsc scaling multiplier msr on each cpu.
>
> Commit 1ab9287add5e2
> ("KVM: X86: Add vendor callbacks for writing the TSC multiplier")
> broke this caching logic.
>
> Refactor the code so that all TSC scaling multiplier writes go through
> a single function which checks and updates the cache.
>
> This fixes the following scenario:
>
> 1. A CPU runs a guest with some tsc scaling ratio.
>
> 2. New guest with different tsc scaling ratio starts on this CPU
> and terminates almost immediately.
>
> This ensures that the short running guest had set the tsc scaling ratio just
> once when it was set via KVM_SET_TSC_KHZ. Due to the bug,
> the per-cpu cache is not updated.
>
> 3. The original guest continues to run, it doesn't restore the msr
> value back to its own value, because the cache matches,
> and thus continues to run with a wrong tsc scaling ratio.
>
>
> Fixes: 1ab9287add5e2 ("KVM: X86: Add vendor callbacks for writing the TSC multiplier")
> Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> ---
> arch/x86/kvm/svm/nested.c | 4 ++--
> arch/x86/kvm/svm/svm.c | 32 ++++++++++++++++++++------------
> arch/x86/kvm/svm/svm.h | 2 +-
> 3 files changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 88da8edbe1e1f..83bae1f2eeb8a 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -1037,7 +1037,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
> WARN_ON(!svm->tsc_scaling_enabled);
> vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
> - svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
> + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
> }
>
> svm->nested.ctl.nested_cr3 = 0;
> @@ -1442,7 +1442,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
> vcpu->arch.tsc_scaling_ratio =
> kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
> svm->tsc_ratio_msr);
> - svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
> + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
> }
>
> /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 4aea82f668fb1..5c873db9432e5 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -512,11 +512,24 @@ static int has_svm(void)
> return 1;
> }
>
> +void __svm_write_tsc_multiplier(u64 multiplier)
> +{
> + preempt_disable();
> +
> + if (multiplier == __this_cpu_read(current_tsc_ratio))
> + goto out;
> +
> + wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
> + __this_cpu_write(current_tsc_ratio, multiplier);
> +out:
> + preempt_enable();
> +}
> +
> static void svm_hardware_disable(void)
> {
> /* Make sure we clean up behind us */
> if (tsc_scaling)
> - wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT);
> + __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
>
> cpu_svm_disable();
>
> @@ -562,8 +575,7 @@ static int svm_hardware_enable(void)
> * Set the default value, even if we don't use TSC scaling
> * to avoid having stale value in the msr
> */
> - wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT);
> - __this_cpu_write(current_tsc_ratio, SVM_TSC_RATIO_DEFAULT);
> + __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
> }
>
>
> @@ -1046,11 +1058,12 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
> }
>
> -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
> +static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
> {
> - wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
> + __svm_write_tsc_multiplier(multiplier);
> }
>
> +
> /* Evaluate instruction intercepts that depend on guest CPUID features. */
> static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
> struct vcpu_svm *svm)
> @@ -1410,13 +1423,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
> sev_es_prepare_switch_to_guest(hostsa);
> }
>
> - if (tsc_scaling) {
> - u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
> - if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
> - __this_cpu_write(current_tsc_ratio, tsc_ratio);
> - wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
> - }
> - }
> + if (tsc_scaling)
> + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
>
> if (likely(tsc_aux_uret_slot >= 0))
> kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index cd92f43437539..2495fe548b5e9 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -594,7 +594,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> bool has_error_code, u32 error_code);
> int nested_svm_exit_special(struct vcpu_svm *svm);
> void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
> -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
> +void __svm_write_tsc_multiplier(u64 multiplier);
> void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
> struct vmcb_control_area *control);
> void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
Queued, thanks.
Paolo
Powered by blists - more mailing lists