[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250529042710.crjcc76dqpiak4pn@desk>
Date: Wed, 28 May 2025 21:27:10 -0700
From: Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org, Borislav Petkov <bp@...en8.de>,
Jim Mattson <jmattson@...gle.com>
Subject: Re: [PATCH 3/5] KVM: VMX: Apply MMIO Stale Data mitigation if KVM
maps MMIO into the guest
On Thu, May 22, 2025 at 06:17:54PM -0700, Sean Christopherson wrote:
> Enforce the MMIO State Data mitigation if KVM has ever mapped host MMIO
> into the VM, not if the VM has an assigned device. VFIO is but one of
> many ways to map host MMIO into a KVM guest, and even within VFIO,
> formally attaching a device to a VM via KVM_DEV_VFIO_FILE_ADD is entirely
> optional.
>
> Track whether or not the guest can access host MMIO on a per-MMU basis,
> i.e. based on whether or not the vCPU has a mapping to host MMIO. For
> simplicity, track MMIO mappings in "special" rools (those without a
> kvm_mmu_page) at the VM level, as only Intel CPUs are vulnerable, and so
> only legacy 32-bit shadow paging is affected, i.e. lack of precise
> tracking is a complete non-issue.
>
> Make the per-MMU and per-VM flags sticky. Detecting when *all* MMIO
> mappings have been removed would be absurdly complex. And in practice,
> removing MMIO from a guest will be done by deleting the associated memslot,
> which by default will force KVM to re-allocate all roots. Special roots
> will forever be mitigated, but as above, the affected scenarios are not
> expected to be performance sensitive.
>
> Use a VMX_RUN flag to communicate the need for a buffers flush to
> vmx_vcpu_enter_exit() so that kvm_vcpu_can_access_host_mmio() and all its
> dependencies don't need to be marked __always_inline, e.g. so that KASAN
> doesn't trigger a noinstr violation.
>
> Cc: Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>
> Cc: Borislav Petkov <bp@...en8.de>
> Fixes: 8cb861e9e3c9 ("x86/speculation/mmio: Add mitigation for Processor MMIO Stale Data")
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/mmu/mmu_internal.h | 3 +++
> arch/x86/kvm/mmu/spte.c | 21 +++++++++++++++++++++
> arch/x86/kvm/mmu/spte.h | 10 ++++++++++
> arch/x86/kvm/vmx/run_flags.h | 10 ++++++----
> arch/x86/kvm/vmx/vmx.c | 8 +++++++-
> 6 files changed, 48 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 01edcefbd937..043be00ec5b8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1458,6 +1458,7 @@ struct kvm_arch {
> bool x2apic_format;
> bool x2apic_broadcast_quirk_disabled;
>
> + bool has_mapped_host_mmio;
> bool guest_can_read_msr_platform_info;
> bool exception_payload_enabled;
>
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index db8f33e4de62..65f3c89d7c5d 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -103,6 +103,9 @@ struct kvm_mmu_page {
> int root_count;
> refcount_t tdp_mmu_root_count;
> };
> +
> + bool has_mapped_host_mmio;
> +
> union {
> /* These two members aren't used for TDP MMU */
> struct {
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index 3f16c91aa042..5fb43a834d48 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -138,6 +138,22 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn, int *is_host_mmio)
> return *is_host_mmio;
> }
>
> +static void kvm_track_host_mmio_mapping(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
> +
> + if (root)
> + WRITE_ONCE(root->has_mapped_host_mmio, true);
> + else
> + WRITE_ONCE(vcpu->kvm->arch.has_mapped_host_mmio, true);
> +
> + /*
> + * Force vCPUs to exit and flush CPU buffers if the vCPU is using the
> + * affected root(s).
> + */
> + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
> +}
> +
> /*
> * Returns true if the SPTE needs to be updated atomically due to having bits
> * that may be changed without holding mmu_lock, and for which KVM must not
> @@ -276,6 +292,11 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
> }
>
> + if (static_branch_unlikely(&mmio_stale_data_clear) &&
> + !kvm_vcpu_can_access_host_mmio(vcpu) &&
> + kvm_is_mmio_pfn(pfn, &is_host_mmio))
> + kvm_track_host_mmio_mapping(vcpu);
> +
> *new_spte = spte;
> return wrprot;
> }
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 1e94f081bdaf..3133f066927e 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -280,6 +280,16 @@ static inline bool is_mirror_sptep(tdp_ptep_t sptep)
> return is_mirror_sp(sptep_to_sp(rcu_dereference(sptep)));
> }
>
> +static inline bool kvm_vcpu_can_access_host_mmio(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
> +
> + if (root)
> + return READ_ONCE(root->has_mapped_host_mmio);
> +
> + return READ_ONCE(vcpu->kvm->arch.has_mapped_host_mmio);
> +}
> +
> static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
> {
> return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
> diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
> index 6a9bfdfbb6e5..2f20fb170def 100644
> --- a/arch/x86/kvm/vmx/run_flags.h
> +++ b/arch/x86/kvm/vmx/run_flags.h
> @@ -2,10 +2,12 @@
> #ifndef __KVM_X86_VMX_RUN_FLAGS_H
> #define __KVM_X86_VMX_RUN_FLAGS_H
>
> -#define VMX_RUN_VMRESUME_SHIFT 0
> -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> +#define VMX_RUN_VMRESUME_SHIFT 0
> +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
> +#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
>
> -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
> +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
> +#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
>
> #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index f79604bc0127..27e870d83122 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -74,6 +74,8 @@
> #include "vmx_onhyperv.h"
> #include "posted_intr.h"
>
> +#include "mmu/spte.h"
> +
> MODULE_AUTHOR("Qumranet");
> MODULE_DESCRIPTION("KVM support for VMX (Intel VT-x) extensions");
> MODULE_LICENSE("GPL");
> @@ -959,6 +961,10 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
> if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
> flags |= VMX_RUN_SAVE_SPEC_CTRL;
>
> + if (static_branch_unlikely(&mmio_stale_data_clear) &&
> + kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
> + flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
> +
> return flags;
> }
>
> @@ -7282,7 +7288,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
> if (static_branch_unlikely(&vmx_l1d_should_flush))
> vmx_l1d_flush(vcpu);
> else if (static_branch_unlikely(&mmio_stale_data_clear) &&
> - kvm_arch_has_assigned_device(vcpu->kvm))
> + (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO))
> mds_clear_cpu_buffers();
I think this also paves way for buffer clear for MDS and MMIO to be done at
a single place. Please let me know if below is feasible:
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index 2f20fb170def..004fe1ca89f0 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,12 +2,12 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME_SHIFT 0
-#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
-#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2
-#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
-#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
-#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT)
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
+#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index f6986dee6f8c..ab602ce4967e 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -141,6 +141,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Check if vmlaunch or vmresume is needed */
bt $VMX_RUN_VMRESUME_SHIFT, %ebx
+ test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx
+
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
mov VCPU_RDX(%_ASM_AX), %_ASM_DX
@@ -161,8 +163,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+ /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */
+ jz .Lskip_clear_cpu_buffers
/* Clobbers EFLAGS.ZF */
CLEAR_CPU_BUFFERS
+.Lskip_clear_cpu_buffers:
/* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
jnc .Lvmlaunch
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e4790c8993a..1415aeea35f7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -958,9 +958,10 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
flags |= VMX_RUN_SAVE_SPEC_CTRL;
- if (static_branch_unlikely(&mmio_stale_data_clear) &&
- kvm_vcpu_can_access_host_mmio(&vmx->vcpu))
- flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO;
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) ||
+ (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_vcpu_can_access_host_mmio(&vmx->vcpu)))
+ flags |= VMX_RUN_CLEAR_CPU_BUFFERS;
return flags;
}
@@ -7296,9 +7297,6 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
*/
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mmio_stale_data_clear) &&
- (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO))
- mds_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
Powered by blists - more mailing lists