>From 8fb6d18ad4cbdd1802df45be49358a6d6acf72a0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 15:58:13 -0700 Subject: [PATCH] KVM: VMX: Sketch in possible framework for eliding TLB flushes on pCPU migration Not-Signed-off-by: Sean Christopherson (anyone that makes this work deserves full credit) Not-yet-Signed-off-by: Jeremi Piotrowski --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/mmu/mmu.c | 5 +++++ arch/x86/kvm/mmu/mmu_internal.h | 4 ++++ arch/x86/kvm/mmu/tdp_mmu.c | 4 ++++ arch/x86/kvm/vmx/main.c | 1 + arch/x86/kvm/vmx/vmx.c | 28 +++++++++++++++++++++------- arch/x86/kvm/vmx/x86_ops.h | 1 + 8 files changed, 40 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 8d50e3e0a19b..60351dd22f2f 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -99,6 +99,7 @@ KVM_X86_OP_OPTIONAL(link_external_spt) KVM_X86_OP_OPTIONAL(set_external_spte) KVM_X86_OP_OPTIONAL(free_external_spt) KVM_X86_OP_OPTIONAL(remove_external_spte) +KVM_X86_OP_OPTIONAL(alloc_root_cpu_mask) KVM_X86_OP(has_wbinvd_exit) KVM_X86_OP(get_l2_tsc_offset) KVM_X86_OP(get_l2_tsc_multiplier) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4a391929cdb..a3d415c3ea8b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1801,6 +1801,9 @@ struct kvm_x86_ops { void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); + /* Allocate per-root pCPU flush mask. */ + void (*alloc_root_cpu_mask)(struct kvm_mmu_page *root); + /* Update external mapping with page table link. */ int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, void *external_spt); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4e06e2e89a8f..721ee8ea76bd 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -20,6 +20,7 @@ #include "ioapic.h" #include "mmu.h" #include "mmu_internal.h" +#include #include "tdp_mmu.h" #include "x86.h" #include "kvm_cache_regs.h" @@ -1820,6 +1821,7 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) list_del(&sp->link); free_page((unsigned long)sp->spt); free_page((unsigned long)sp->shadowed_translation); + free_cpumask_var(sp->cpu_flushed_mask); kmem_cache_free(mmu_page_header_cache, sp); } @@ -3827,6 +3829,9 @@ static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, sp = kvm_mmu_get_shadow_page(vcpu, gfn, role); ++sp->root_count; + if (level >= PT64_ROOT_4LEVEL) + kvm_x86_call(alloc_root_cpu_mask)(sp); + return __pa(sp->spt); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index db8f33e4de62..5acb3dd34b36 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -7,6 +7,7 @@ #include #include "mmu.h" +#include #ifdef CONFIG_KVM_PROVE_MMU #define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x) @@ -145,6 +146,9 @@ struct kvm_mmu_page { /* Used for freeing the page asynchronously if it is a TDP MMU page. */ struct rcu_head rcu_head; #endif + + /* Mask tracking which host CPUs have flushed this EPT root */ + cpumask_var_t cpu_flushed_mask; }; extern struct kmem_cache *mmu_page_header_cache; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7f3d7229b2c1..40c7f46f553c 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -3,6 +3,7 @@ #include "mmu.h" #include "mmu_internal.h" +#include #include "mmutrace.h" #include "tdp_iter.h" #include "tdp_mmu.h" @@ -57,6 +58,7 @@ static void tdp_mmu_free_sp(struct kvm_mmu_page *sp) { free_page((unsigned long)sp->external_spt); free_page((unsigned long)sp->spt); + free_cpumask_var(sp->cpu_flushed_mask); kmem_cache_free(mmu_page_header_cache, sp); } @@ -293,6 +295,8 @@ void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool mirror) root = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_sp(root, NULL, 0, role); + kvm_x86_call(alloc_root_cpu_mask)(root); + /* * TDP MMU roots are kept until they are explicitly invalidated, either * by a memslot update or by the destruction of the VM. Initialize the diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index d1e02e567b57..ec7f6899443d 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -1005,6 +1005,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .write_tsc_multiplier = vt_op(write_tsc_multiplier), .load_mmu_pgd = vt_op(load_mmu_pgd), + .alloc_root_cpu_mask = vmx_alloc_root_cpu_mask, .check_intercept = vmx_check_intercept, .handle_exit_irqoff = vmx_handle_exit_irqoff, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index eec2d866e7f1..a6d93624c2d4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ #include "kvm_cache_regs.h" #include "lapic.h" #include "mmu.h" +#include "mmu/spte.h" #include "nested.h" #include "pmu.h" #include "sgx.h" @@ -1450,7 +1452,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } } -static void vmx_flush_ept_on_pcpu_migration(struct kvm_mmu *mmu); +static void vmx_flush_ept_on_pcpu_migration(struct kvm_mmu *mmu, int cpu); void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) { @@ -1489,8 +1491,8 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) * TLB entries from its previous association with the vCPU. */ if (enable_ept) { - vmx_flush_ept_on_pcpu_migration(&vcpu->arch.root_mmu); - vmx_flush_ept_on_pcpu_migration(&vcpu->arch.guest_mmu); + vmx_flush_ept_on_pcpu_migration(&vcpu->arch.root_mmu, cpu); + vmx_flush_ept_on_pcpu_migration(&vcpu->arch.guest_mmu, cpu); } else { kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } @@ -3307,22 +3309,34 @@ void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) vpid_sync_context(vmx_get_current_vpid(vcpu)); } -static void __vmx_flush_ept_on_pcpu_migration(hpa_t root_hpa) +void vmx_alloc_root_cpu_mask(struct kvm_mmu_page *root) { + WARN_ON_ONCE(!zalloc_cpumask_var(&root->cpu_flushed_mask, + GFP_KERNEL_ACCOUNT)); +} + +static void __vmx_flush_ept_on_pcpu_migration(hpa_t root_hpa, int cpu) +{ + struct kvm_mmu_page *root; + if (!VALID_PAGE(root_hpa)) return; + root = root_to_sp(root_hpa); + if (!root || cpumask_test_and_set_cpu(cpu, root->cpu_flushed_mask)) + return; + vmx_flush_tlb_ept_root(root_hpa); } -static void vmx_flush_ept_on_pcpu_migration(struct kvm_mmu *mmu) +static void vmx_flush_ept_on_pcpu_migration(struct kvm_mmu *mmu, int cpu) { int i; - __vmx_flush_ept_on_pcpu_migration(mmu->root.hpa); + __vmx_flush_ept_on_pcpu_migration(mmu->root.hpa, cpu); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - __vmx_flush_ept_on_pcpu_migration(mmu->prev_roots[i].hpa); + __vmx_flush_ept_on_pcpu_migration(mmu->prev_roots[i].hpa, cpu); } void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index b4596f651232..4406d53e6ebe 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -84,6 +84,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu); void vmx_flush_tlb_current(struct kvm_vcpu *vcpu); void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr); void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu); +void vmx_alloc_root_cpu_mask(struct kvm_mmu_page *root); void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall); -- 2.39.5