[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANgfPd8u9CtHBjxjHWKyKNOvq542NA0NwuYmQos5==MfRodksw@mail.gmail.com>
Date: Mon, 28 Feb 2022 14:38:15 -0800
From: Ben Gardon <bgardon@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Janosch Frank <frankja@...ux.ibm.com>,
David Hildenbrand <david@...hat.com>,
Claudio Imbrenda <imbrenda@...ux.ibm.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>, kvm <kvm@...r.kernel.org>,
LKML <linux-kernel@...r.kernel.org>,
Lai Jiangshan <jiangshanlai@...il.com>
Subject: Re: [PATCH v2 4/7] KVM: x86/mmu: Zap only obsolete roots if a root
shadow page is zapped
On Fri, Feb 25, 2022 at 10:22 AM Sean Christopherson <seanjc@...gle.com> wrote:
>
> Zap only obsolete roots when responding to zapping a single root shadow
> page. Because KVM keeps root_count elevated when stuffing a previous
> root into its PGD cache, shadowing a 64-bit guest means that zapping any
> root causes all vCPUs to reload all roots, even if their current root is
> not affected by the zap.
>
> For many kernels, zapping a single root is a frequent operation, e.g. in
> Linux it happens whenever an mm is dropped, e.g. process exits, etc...
>
Reviewed-by: Ben Gardon <bgardon@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/include/asm/kvm_host.h | 2 +
> arch/x86/kvm/mmu.h | 1 +
> arch/x86/kvm/mmu/mmu.c | 65 +++++++++++++++++++++++++++++----
> arch/x86/kvm/x86.c | 4 +-
> 4 files changed, 63 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 713e08f62385..343041e892c6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -102,6 +102,8 @@
> #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
> #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
> KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> +#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
> + KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>
> #define CR0_RESERVED_BITS \
> (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 1d0c1904d69a..bf8dbc4bb12a 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -80,6 +80,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>
> int kvm_mmu_load(struct kvm_vcpu *vcpu);
> void kvm_mmu_unload(struct kvm_vcpu *vcpu);
> +void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
> void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
> void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 32c6d4b33d03..825996408465 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2310,7 +2310,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
> struct list_head *invalid_list,
> int *nr_zapped)
> {
> - bool list_unstable;
> + bool list_unstable, zapped_root = false;
>
> trace_kvm_mmu_prepare_zap_page(sp);
> ++kvm->stat.mmu_shadow_zapped;
> @@ -2352,14 +2352,20 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
> * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also
> * treats invalid shadow pages as being obsolete.
> */
> - if (!is_obsolete_sp(kvm, sp))
> - kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
> + zapped_root = !is_obsolete_sp(kvm, sp);
> }
>
> if (sp->lpage_disallowed)
> unaccount_huge_nx_page(kvm, sp);
>
> sp->role.invalid = 1;
> +
> + /*
> + * Make the request to free obsolete roots after marking the root
> + * invalid, otherwise other vCPUs may not see it as invalid.
> + */
> + if (zapped_root)
> + kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
> return list_unstable;
> }
>
> @@ -3947,7 +3953,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
> * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
> * to reload even if no vCPU is actively using the root.
> */
> - if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu))
> + if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
> return true;
>
> return fault->slot &&
> @@ -4180,8 +4186,8 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
> /*
> * It's possible that the cached previous root page is obsolete because
> * of a change in the MMU generation number. However, changing the
> - * generation number is accompanied by KVM_REQ_MMU_RELOAD, which will
> - * free the root set here and allocate a new one.
> + * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS,
> + * which will free the root set here and allocate a new one.
> */
> kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
>
> @@ -5085,6 +5091,51 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
> vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
> }
>
> +static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
> +{
> + struct kvm_mmu_page *sp;
> +
> + if (!VALID_PAGE(root_hpa))
> + return false;
> +
> + /*
> + * When freeing obsolete roots, treat roots as obsolete if they don't
> + * have an associated shadow page. This does mean KVM will get false
> + * positives and free roots that don't strictly need to be freed, but
> + * such false positives are relatively rare:
> + *
> + * (a) only PAE paging and nested NPT has roots without shadow pages
> + * (b) remote reloads due to a memslot update obsoletes _all_ roots
> + * (c) KVM doesn't track previous roots for PAE paging, and the guest
> + * is unlikely to zap an in-use PGD.
> + */
> + sp = to_shadow_page(root_hpa);
> + return !sp || is_obsolete_sp(kvm, sp);
> +}
> +
> +static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu)
> +{
> + unsigned long roots_to_free = 0;
> + int i;
> +
> + if (is_obsolete_root(kvm, mmu->root.hpa))
> + roots_to_free |= KVM_MMU_ROOT_CURRENT;
> +
> + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
> + if (is_obsolete_root(kvm, mmu->root.hpa))
> + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
> + }
> +
> + if (roots_to_free)
> + kvm_mmu_free_roots(kvm, mmu, roots_to_free);
> +}
> +
> +void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
> +{
> + __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu);
> + __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
> +}
> +
> static bool need_remote_flush(u64 old, u64 new)
> {
> if (!is_shadow_present_pte(old))
> @@ -5656,7 +5707,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
> * Note: we need to do this under the protection of mmu_lock,
> * otherwise, vcpu would purge shadow page but miss tlb flush.
> */
> - kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
> + kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
>
> kvm_zap_obsolete_pages(kvm);
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 579b26ffc124..d6bf0562c4c4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9856,8 +9856,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> goto out;
> }
> }
> - if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
> - kvm_mmu_unload(vcpu);
> + if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
> + kvm_mmu_free_obsolete_roots(vcpu);
> if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
> __kvm_migrate_timers(vcpu);
> if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
> --
> 2.35.1.574.g5d30c73bfb-goog
>
Powered by blists - more mailing lists