linux-kernel - Re: [PATCH v2 4/7] KVM: x86/mmu: Zap only obsolete roots if a root shadow page is zapped

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANgfPd8u9CtHBjxjHWKyKNOvq542NA0NwuYmQos5==MfRodksw@mail.gmail.com>
Date:   Mon, 28 Feb 2022 14:38:15 -0800
From:   Ben Gardon <bgardon@...gle.com>
To:     Sean Christopherson <seanjc@...gle.com>
Cc:     Paolo Bonzini <pbonzini@...hat.com>,
        Christian Borntraeger <borntraeger@...ux.ibm.com>,
        Janosch Frank <frankja@...ux.ibm.com>,
        David Hildenbrand <david@...hat.com>,
        Claudio Imbrenda <imbrenda@...ux.ibm.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm <kvm@...r.kernel.org>,
        LKML <linux-kernel@...r.kernel.org>,
        Lai Jiangshan <jiangshanlai@...il.com>
Subject: Re: [PATCH v2 4/7] KVM: x86/mmu: Zap only obsolete roots if a root
 shadow page is zapped

On Fri, Feb 25, 2022 at 10:22 AM Sean Christopherson <seanjc@...gle.com> wrote:
>
> Zap only obsolete roots when responding to zapping a single root shadow
> page.  Because KVM keeps root_count elevated when stuffing a previous
> root into its PGD cache, shadowing a 64-bit guest means that zapping any
> root causes all vCPUs to reload all roots, even if their current root is
> not affected by the zap.
>
> For many kernels, zapping a single root is a frequent operation, e.g. in
> Linux it happens whenever an mm is dropped, e.g. process exits, etc...
>

Reviewed-by: Ben Gardon <bgardon@...gle.com>

> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 +
>  arch/x86/kvm/mmu.h              |  1 +
>  arch/x86/kvm/mmu/mmu.c          | 65 +++++++++++++++++++++++++++++----
>  arch/x86/kvm/x86.c              |  4 +-
>  4 files changed, 63 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 713e08f62385..343041e892c6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -102,6 +102,8 @@
>  #define KVM_REQ_MSR_FILTER_CHANGED     KVM_ARCH_REQ(29)
>  #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
>         KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> +#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
> +       KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>
>  #define CR0_RESERVED_BITS                                               \
>         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 1d0c1904d69a..bf8dbc4bb12a 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -80,6 +80,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>
>  int kvm_mmu_load(struct kvm_vcpu *vcpu);
>  void kvm_mmu_unload(struct kvm_vcpu *vcpu);
> +void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
>  void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
>  void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 32c6d4b33d03..825996408465 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2310,7 +2310,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
>                                        struct list_head *invalid_list,
>                                        int *nr_zapped)
>  {
> -       bool list_unstable;
> +       bool list_unstable, zapped_root = false;
>
>         trace_kvm_mmu_prepare_zap_page(sp);
>         ++kvm->stat.mmu_shadow_zapped;
> @@ -2352,14 +2352,20 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
>                  * in kvm_mmu_zap_all_fast().  Note, is_obsolete_sp() also
>                  * treats invalid shadow pages as being obsolete.
>                  */
> -               if (!is_obsolete_sp(kvm, sp))
> -                       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
> +               zapped_root = !is_obsolete_sp(kvm, sp);
>         }
>
>         if (sp->lpage_disallowed)
>                 unaccount_huge_nx_page(kvm, sp);
>
>         sp->role.invalid = 1;
> +
> +       /*
> +        * Make the request to free obsolete roots after marking the root
> +        * invalid, otherwise other vCPUs may not see it as invalid.
> +        */
> +       if (zapped_root)
> +               kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
>         return list_unstable;
>  }
>
> @@ -3947,7 +3953,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
>          * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
>          * to reload even if no vCPU is actively using the root.
>          */
> -       if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu))
> +       if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
>                 return true;
>
>         return fault->slot &&
> @@ -4180,8 +4186,8 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
>         /*
>          * It's possible that the cached previous root page is obsolete because
>          * of a change in the MMU generation number. However, changing the
> -        * generation number is accompanied by KVM_REQ_MMU_RELOAD, which will
> -        * free the root set here and allocate a new one.
> +        * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS,
> +        * which will free the root set here and allocate a new one.
>          */
>         kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
>
> @@ -5085,6 +5091,51 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
>         vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
>  }
>
> +static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
> +{
> +       struct kvm_mmu_page *sp;
> +
> +       if (!VALID_PAGE(root_hpa))
> +               return false;
> +
> +       /*
> +        * When freeing obsolete roots, treat roots as obsolete if they don't
> +        * have an associated shadow page.  This does mean KVM will get false
> +        * positives and free roots that don't strictly need to be freed, but
> +        * such false positives are relatively rare:
> +        *
> +        *  (a) only PAE paging and nested NPT has roots without shadow pages
> +        *  (b) remote reloads due to a memslot update obsoletes _all_ roots
> +        *  (c) KVM doesn't track previous roots for PAE paging, and the guest
> +        *      is unlikely to zap an in-use PGD.
> +        */
> +       sp = to_shadow_page(root_hpa);
> +       return !sp || is_obsolete_sp(kvm, sp);
> +}
> +
> +static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu)
> +{
> +       unsigned long roots_to_free = 0;
> +       int i;
> +
> +       if (is_obsolete_root(kvm, mmu->root.hpa))
> +               roots_to_free |= KVM_MMU_ROOT_CURRENT;
> +
> +       for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
> +               if (is_obsolete_root(kvm, mmu->root.hpa))
> +                       roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
> +       }
> +
> +       if (roots_to_free)
> +               kvm_mmu_free_roots(kvm, mmu, roots_to_free);
> +}
> +
> +void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
> +{
> +       __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu);
> +       __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
> +}
> +
>  static bool need_remote_flush(u64 old, u64 new)
>  {
>         if (!is_shadow_present_pte(old))
> @@ -5656,7 +5707,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
>          * Note: we need to do this under the protection of mmu_lock,
>          * otherwise, vcpu would purge shadow page but miss tlb flush.
>          */
> -       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
> +       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
>
>         kvm_zap_obsolete_pages(kvm);
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 579b26ffc124..d6bf0562c4c4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9856,8 +9856,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                                 goto out;
>                         }
>                 }
> -               if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
> -                       kvm_mmu_unload(vcpu);
> +               if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
> +                       kvm_mmu_free_obsolete_roots(vcpu);
>                 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
>                         __kvm_migrate_timers(vcpu);
>                 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
> --
> 2.35.1.574.g5d30c73bfb-goog
>