[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALMp9eRDehmWC1gZmSjxjwCvm4VXf_FnR-MiFkHxkTn4_DJ4aA@mail.gmail.com>
Date: Mon, 7 Nov 2022 10:45:30 -0800
From: Jim Mattson <jmattson@...gle.com>
To: Paolo Bonzini <pbonzini@...hat.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
nathan@...nel.org, thomas.lendacky@....com,
andrew.cooper3@...rix.com, peterz@...radead.org, seanjc@...gle.com,
stable@...r.kernel.org
Subject: Re: [PATCH 7/8] KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly
On Mon, Nov 7, 2022 at 6:54 AM Paolo Bonzini <pbonzini@...hat.com> wrote:
>
> Restoration of the host IA32_SPEC_CTRL value is probably too late
> with respect to the return thunk training sequence.
>
> With respect to the user/kernel boundary, AMD says, "If software chooses
> to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel
> exit), software should set STIBP to 1 before executing the return thunk
> training sequence." I assume the same requirements apply to the guest/host
> boundary. The return thunk training sequence is in vmenter.S, quite close
> to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's
> IA32_SPEC_CTRL value is not restored until much later.
>
> To avoid this, move the restoration of host SPEC_CTRL to assembly and,
> for consistency, move the restoration of the guest SPEC_CTRL as well.
> This is not particularly difficult, apart from some care to cover both
> 32- and 64-bit, and to share code between SEV-ES and normal vmentry.
>
> Cc: stable@...r.kernel.org
> Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk")
> Suggested-by: Jim Mattson <jmattson@...gle.com>
> Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
> ---
> arch/x86/kernel/asm-offsets.c | 1 +
> arch/x86/kernel/cpu/bugs.c | 13 ++---
> arch/x86/kvm/svm/svm.c | 38 ++++++---------
> arch/x86/kvm/svm/svm.h | 4 +-
> arch/x86/kvm/svm/vmenter.S | 92 ++++++++++++++++++++++++++++++++++-
> 5 files changed, 111 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
> index 69d1fed51086..d0bd68af0a5a 100644
> --- a/arch/x86/kernel/asm-offsets.c
> +++ b/arch/x86/kernel/asm-offsets.c
> @@ -115,6 +115,7 @@ static void __used common(void)
> OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs);
> OFFSET(SVM_vmcb01, vcpu_svm, vmcb01);
> OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb);
> + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl);
> OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa);
> }
>
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index da7c361f47e0..6ec0b7ce7453 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -196,22 +196,15 @@ void __init check_bugs(void)
> }
>
> /*
> - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
> - * done in vmenter.S.
> + * NOTE: This function is *only* called for SVM, since Intel uses
> + * MSR_IA32_SPEC_CTRL for SSBD.
> */
> void
> x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
> {
> - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
> + u64 guestval, hostval;
> struct thread_info *ti = current_thread_info();
>
> - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
> - if (hostval != guestval) {
> - msrval = setguest ? guestval : hostval;
> - wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
> - }
> - }
> -
> /*
> * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
> * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 381c7dcffe25..31aa158a2e10 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -731,6 +731,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
> u32 offset;
> u32 *msrpm;
>
> + /*
> + * For non-nested case:
> + * If the L01 MSR bitmap does not intercept the MSR, then we need to
> + * save it.
> + *
> + * For nested case:
> + * If the L02 MSR bitmap does not intercept the MSR, then we need to
> + * save it.
> + */
> msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
> to_svm(vcpu)->msrpm;
>
> @@ -3912,18 +3921,19 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
> return EXIT_FASTPATH_NONE;
> }
>
> -static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
> +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> guest_state_enter_irqoff();
>
> if (sev_es_guest(vcpu->kvm)) {
> - __svm_sev_es_vcpu_run(svm);
> + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
> } else {
> struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
>
> - __svm_vcpu_run(svm, __sme_page_pa(sd->save_area));
> + __svm_vcpu_run(svm, __sme_page_pa(sd->save_area),
> + spec_ctrl_intercepted);
> }
>
> guest_state_exit_irqoff();
> @@ -3932,6 +3942,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
> static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
>
> trace_kvm_entry(vcpu);
>
> @@ -3990,26 +4001,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
> if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
> x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
>
> - svm_vcpu_enter_exit(vcpu);
> -
> - /*
> - * We do not use IBRS in the kernel. If this vCPU has used the
> - * SPEC_CTRL MSR it may have left it on; save the value and
> - * turn it off. This is much more efficient than blindly adding
> - * it to the atomic save/restore list. Especially as the former
> - * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
> - *
> - * For non-nested case:
> - * If the L01 MSR bitmap does not intercept the MSR, then we need to
> - * save it.
> - *
> - * For nested case:
> - * If the L02 MSR bitmap does not intercept the MSR, then we need to
> - * save it.
> - */
> - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
> - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
> - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
> + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
>
> if (!sev_es_guest(vcpu->kvm))
> reload_tss(vcpu);
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 99410651f2a5..9d940d8736f0 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -483,7 +483,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
>
> /* vmenter.S */
>
> -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm);
> -void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa);
> +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
> +void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa, bool spec_ctrl_intercepted);
>
> #endif
> diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
> index 45a4bd002494..9e381386ffdc 100644
> --- a/arch/x86/kvm/svm/vmenter.S
> +++ b/arch/x86/kvm/svm/vmenter.S
> @@ -32,10 +32,64 @@
>
> .section .noinstr.text, "ax"
>
> +.macro RESTORE_GUEST_SPEC_CTRL
> + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
> + ALTERNATIVE_2 "jmp 999f", \
> + "", X86_FEATURE_MSR_SPEC_CTRL, \
> + "jmp 999f", X86_FEATURE_V_SPEC_CTRL
> +
> + /*
> + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
> + * host's, write the MSR.
> + *
> + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
> + * there must not be any returns or indirect branches between this code
> + * and vmentry.
> + */
> + movl SVM_spec_ctrl(%_ASM_DI), %eax
> + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
> + je 999f
> + mov $MSR_IA32_SPEC_CTRL, %ecx
> + xor %edx, %edx
> + wrmsr
> +999:
> +
> +.endm
> +
> +.macro RESTORE_HOST_SPEC_CTRL
> + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
> + ALTERNATIVE_2 "jmp 999f", \
> + "", X86_FEATURE_MSR_SPEC_CTRL, \
> + "jmp 999f", X86_FEATURE_V_SPEC_CTRL
> +
> + mov $MSR_IA32_SPEC_CTRL, %ecx
> +
> + /*
> + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
> + * if it was not intercepted during guest execution.
> + */
> + cmpb $0, (%_ASM_SP)
> + jnz 998f
> + rdmsr
> + movl %eax, SVM_spec_ctrl(%_ASM_DI)
> +998:
> +
> + /* Now restore the host value of the MSR if different from the guest's. */
> + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
> + cmp SVM_spec_ctrl(%_ASM_DI), %eax
> + je 999f
> + xor %edx, %edx
> + wrmsr
> +999:
> +
> +.endm
> +
> +
It seems unfortunate to have the unconditional branches in the more
common cases.
Powered by blists - more mailing lists