linux-kernel - Re: [PATCH v6 4/8] KVM: x86/mmu: Set disallowed_nx_huge

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y1IyOJsVGo+pCYTq@yzhao56-desk.sh.intel.com>
Date:   Fri, 21 Oct 2022 13:46:32 +0800
From:   Yan Zhao <yan.y.zhao@...el.com>
To:     Sean Christopherson <seanjc@...gle.com>
CC:     Paolo Bonzini <pbonzini@...hat.com>, <kvm@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>, Mingwei Zhang <mizhang@...gle.com>,
        "David Matlack" <dmatlack@...gle.com>,
        Ben Gardon <bgardon@...gle.com>
Subject: Re: [PATCH v6 4/8] KVM: x86/mmu: Set disallowed_nx_huge_page in TDP
 MMU before setting SPTE

Reviewed-by: Yan Zhao <yan.y.zhao@...el.com>

On Wed, Oct 19, 2022 at 04:56:14PM +0000, Sean Christopherson wrote:
> Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP
> visible to other readers, i.e. before setting its SPTE.  This will allow
> KVM to query the flag when determining if a shadow page can be replaced
> by a NX huge page without violating the rules of the mitigation.
> 
> Note, the shadow/legacy MMU holds mmu_lock for write, so it's impossible
> for another CPU to see a shadow page without an up-to-date
> nx_huge_page_disallowed, i.e. only the TDP MMU needs the complicated
> dance.
> 
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> Reviewed-by: David Matlack <dmatlack@...gle.com>
> ---
>  arch/x86/kvm/mmu/mmu.c          | 28 +++++++++++++++++++---------
>  arch/x86/kvm/mmu/mmu_internal.h |  5 ++---
>  arch/x86/kvm/mmu/tdp_mmu.c      | 31 ++++++++++++++++++-------------
>  3 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 99086a684dd2..57c7c52d137a 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -802,11 +802,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>  		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
>  }
>  
> -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> -			  bool nx_huge_page_possible)
> +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
> -	sp->nx_huge_page_disallowed = true;
> -
>  	/*
>  	 * If it's possible to replace the shadow page with an NX huge page,
>  	 * i.e. if the shadow page is the only thing currently preventing KVM
> @@ -815,8 +812,7 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>  	 * on the list if KVM is reusing an existing shadow page, i.e. if KVM
>  	 * links a shadow page at multiple points.
>  	 */
> -	if (!nx_huge_page_possible ||
> -	    !list_empty(&sp->possible_nx_huge_page_link))
> +	if (!list_empty(&sp->possible_nx_huge_page_link))
>  		return;
>  
>  	++kvm->stat.nx_lpage_splits;
> @@ -824,6 +820,15 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>  		      &kvm->arch.possible_nx_huge_pages);
>  }
>  
> +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> +				 bool nx_huge_page_possible)
> +{
> +	sp->nx_huge_page_disallowed = true;
> +
> +	if (nx_huge_page_possible)
> +		track_possible_nx_huge_page(kvm, sp);
> +}
> +
>  static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	struct kvm_memslots *slots;
> @@ -841,10 +846,8 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>  	kvm_mmu_gfn_allow_lpage(slot, gfn);
>  }
>  
> -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
> -	sp->nx_huge_page_disallowed = false;
> -
>  	if (list_empty(&sp->possible_nx_huge_page_link))
>  		return;
>  
> @@ -852,6 +855,13 @@ void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  	list_del_init(&sp->possible_nx_huge_page_link);
>  }
>  
> +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> +{
> +	sp->nx_huge_page_disallowed = false;
> +
> +	untrack_possible_nx_huge_page(kvm, sp);
> +}
> +
>  static struct kvm_memory_slot *
>  gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
>  			    bool no_dirty_log)
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 67879459a25c..22152241bd29 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -328,8 +328,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
>  
>  void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
>  
> -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> -			  bool nx_huge_page_possible);
> -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
> +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
> +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
>  
>  #endif /* __KVM_X86_MMU_INTERNAL_H */
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 73eb28ed1f03..059231c82345 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -403,8 +403,11 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
>  		lockdep_assert_held_write(&kvm->mmu_lock);
>  
>  	list_del(&sp->link);
> -	if (sp->nx_huge_page_disallowed)
> -		unaccount_nx_huge_page(kvm, sp);
> +
> +	if (sp->nx_huge_page_disallowed) {
> +		sp->nx_huge_page_disallowed = false;
> +		untrack_possible_nx_huge_page(kvm, sp);
> +	}
>  
>  	if (shared)
>  		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> @@ -1118,16 +1121,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
>   * @kvm: kvm instance
>   * @iter: a tdp_iter instance currently on the SPTE that should be set
>   * @sp: The new TDP page table to install.
> - * @account_nx: True if this page table is being installed to split a
> - *              non-executable huge page.
>   * @shared: This operation is running under the MMU lock in read mode.
>   *
>   * Returns: 0 if the new page table was installed. Non-0 if the page table
>   *          could not be installed (e.g. the atomic compare-exchange failed).
>   */
>  static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
> -			   struct kvm_mmu_page *sp, bool account_nx,
> -			   bool shared)
> +			   struct kvm_mmu_page *sp, bool shared)
>  {
>  	u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled());
>  	int ret = 0;
> @@ -1142,8 +1142,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
>  
>  	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
>  	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
> -	if (account_nx)
> -		account_nx_huge_page(kvm, sp, true);
>  	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
>  	tdp_account_mmu_page(kvm, sp);
>  
> @@ -1157,6 +1155,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
>  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
>  	struct kvm_mmu *mmu = vcpu->arch.mmu;
> +	struct kvm *kvm = vcpu->kvm;
>  	struct tdp_iter iter;
>  	struct kvm_mmu_page *sp;
>  	int ret;
> @@ -1193,9 +1192,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  		}
>  
>  		if (!is_shadow_present_pte(iter.old_spte)) {
> -			bool account_nx = fault->huge_page_disallowed &&
> -					  fault->req_level >= iter.level;
> -
>  			/*
>  			 * If SPTE has been frozen by another thread, just
>  			 * give up and retry, avoiding unnecessary page table
> @@ -1207,10 +1203,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  			sp = tdp_mmu_alloc_sp(vcpu);
>  			tdp_mmu_init_child_sp(sp, &iter);
>  
> -			if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) {
> +			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
> +
> +			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
>  				tdp_mmu_free_sp(sp);
>  				break;
>  			}
> +
> +			if (fault->huge_page_disallowed &&
> +			    fault->req_level >= iter.level) {
> +				spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> +				track_possible_nx_huge_page(kvm, sp);
> +				spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> +			}
>  		}
>  	}
>  
> @@ -1498,7 +1503,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
>  	 * correctness standpoint since the translation will be the same either
>  	 * way.
>  	 */
> -	ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared);
> +	ret = tdp_mmu_link_sp(kvm, iter, sp, shared);
>  	if (ret)
>  		goto out;
>  
> -- 
> 2.38.0.413.g74048e4d9e-goog
>