linux-kernel - Re: [PATCH v3 01/12] Revert "KVM: x86/mmu: Allow zap gfn range to operate under the mmu read lock"

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <14a0d715-d059-3a85-a803-63d9b0fb790f@redhat.com>
Date:   Tue, 3 Aug 2021 10:05:27 +0200
From:   Paolo Bonzini <pbonzini@...hat.com>
To:     Maxim Levitsky <mlevitsk@...hat.com>, kvm@...r.kernel.org
Cc:     Wanpeng Li <wanpengli@...cent.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Joerg Roedel <joro@...tes.org>, Borislav Petkov <bp@...en8.de>,
        Sean Christopherson <seanjc@...gle.com>,
        Jim Mattson <jmattson@...gle.com>,
        "maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)" <x86@...nel.org>,
        "open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)" 
        <linux-kernel@...r.kernel.org>,
        Suravee Suthikulpanit <suravee.suthikulpanit@....com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Ingo Molnar <mingo@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>
Subject: Re: [PATCH v3 01/12] Revert "KVM: x86/mmu: Allow zap gfn range to
 operate under the mmu read lock"

On 02/08/21 20:33, Maxim Levitsky wrote:
> From: Sean Christopherson <seanjc@...gle.com>
> 
> This together with the next patch will fix a future race between
> kvm_zap_gfn_range and the page fault handler, which will happen
> when AVIC memslot is going to be only partially disabled.
> 
> This is based on a patch suggested by Sean Christopherson:
> https://lkml.org/lkml/2021/7/22/1025

I'll also add a small note from the original message:

     The performance impact is minimal since kvm_zap_gfn_range is only called by
     users, update_mtrr() and kvm_post_set_cr0().  Both only use it if the guest
     has non-coherent DMA, in order to honor the guest's UC memtype.  MTRR and CD
     setup only happens at boot, and generally in an area where the page tables
     should be small (for CD) or should not include the affected GFNs at all
     (for MTRRs).

On top of this, I think the CD case (kvm_post_set_cr0) can be changed to use
kvm_mmu_zap_all_fast.

Paolo

> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> ---
>   arch/x86/kvm/mmu/mmu.c     | 19 ++++++++-----------
>   arch/x86/kvm/mmu/tdp_mmu.c | 15 ++++-----------
>   arch/x86/kvm/mmu/tdp_mmu.h | 11 ++++-------
>   3 files changed, 16 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a8cdfd8d45c4..9d78cb1c0f35 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5638,8 +5638,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
>   	int i;
>   	bool flush = false;
>   
> +	write_lock(&kvm->mmu_lock);
> +
>   	if (kvm_memslots_have_rmaps(kvm)) {
> -		write_lock(&kvm->mmu_lock);
>   		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
>   			slots = __kvm_memslots(kvm, i);
>   			kvm_for_each_memslot(memslot, slots) {
> @@ -5659,22 +5660,18 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
>   		}
>   		if (flush)
>   			kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
> -		write_unlock(&kvm->mmu_lock);
>   	}
>   
>   	if (is_tdp_mmu_enabled(kvm)) {
> -		flush = false;
> -
> -		read_lock(&kvm->mmu_lock);
>   		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
>   			flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start,
> -							  gfn_end, flush, true);
> -		if (flush)
> -			kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
> -							   gfn_end);
> -
> -		read_unlock(&kvm->mmu_lock);
> +							  gfn_end, flush);
>   	}
> +
> +	if (flush)
> +		kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
> +
> +	write_unlock(&kvm->mmu_lock);
>   }
>   
>   static bool slot_rmap_write_protect(struct kvm *kvm,
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 43f12f5d12c0..3e0222ce3f4e 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -777,21 +777,15 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>    * non-root pages mapping GFNs strictly within that range. Returns true if
>    * SPTEs have been cleared and a TLB flush is needed before releasing the
>    * MMU lock.
> - *
> - * If shared is true, this thread holds the MMU lock in read mode and must
> - * account for the possibility that other threads are modifying the paging
> - * structures concurrently. If shared is false, this thread should hold the
> - * MMU in write mode.
>    */
>   bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
> -				 gfn_t end, bool can_yield, bool flush,
> -				 bool shared)
> +				 gfn_t end, bool can_yield, bool flush)
>   {
>   	struct kvm_mmu_page *root;
>   
> -	for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, shared)
> +	for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, false)
>   		flush = zap_gfn_range(kvm, root, start, end, can_yield, flush,
> -				      shared);
> +				      false);
>   
>   	return flush;
>   }
> @@ -803,8 +797,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
>   	int i;
>   
>   	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> -		flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
> -						  flush, false);
> +		flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn, flush);
>   
>   	if (flush)
>   		kvm_flush_remote_tlbs(kvm);
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> index b224d126adf9..358f447d4012 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.h
> +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> @@ -20,14 +20,11 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
>   			  bool shared);
>   
>   bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
> -				 gfn_t end, bool can_yield, bool flush,
> -				 bool shared);
> +				 gfn_t end, bool can_yield, bool flush);
>   static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id,
> -					     gfn_t start, gfn_t end, bool flush,
> -					     bool shared)
> +					     gfn_t start, gfn_t end, bool flush)
>   {
> -	return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush,
> -					   shared);
> +	return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush);
>   }
>   static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
>   {
> @@ -44,7 +41,7 @@ static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
>   	 */
>   	lockdep_assert_held_write(&kvm->mmu_lock);
>   	return __kvm_tdp_mmu_zap_gfn_range(kvm, kvm_mmu_page_as_id(sp),
> -					   sp->gfn, end, false, false, false);
> +					   sp->gfn, end, false, false);
>   }
>   
>   void kvm_tdp_mmu_zap_all(struct kvm *kvm);
>