lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zh8yHEiOKyvZO+QR@chao-email>
Date: Wed, 17 Apr 2024 10:21:16 +0800
From: Chao Gao <chao.gao@...el.com>
To: <isaku.yamahata@...el.com>
CC: <kvm@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<isaku.yamahata@...il.com>, Paolo Bonzini <pbonzini@...hat.com>,
	<erdemaktas@...gle.com>, Sean Christopherson <seanjc@...gle.com>, Sagi Shahar
	<sagis@...gle.com>, Kai Huang <kai.huang@...el.com>, <chen.bo@...el.com>,
	<hang.yuan@...el.com>, <tina.zhang@...el.com>, Sean Christopherson
	<sean.j.christopherson@...el.com>
Subject: Re: [PATCH v19 059/130] KVM: x86/tdp_mmu: Don't zap private pages
 for unsupported cases

On Mon, Feb 26, 2024 at 12:26:01AM -0800, isaku.yamahata@...el.com wrote:
>@@ -779,6 +780,10 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
> 
> 	lockdep_assert_held_write(&kvm->mmu_lock);
> 
>+	WARN_ON_ONCE(zap_private && !is_private_sp(root));
>+	if (!zap_private && is_private_sp(root))
>+		return false;

Should be "return flush;".

Fengwei and I spent one week chasing a bug where virtio-net in the TD guest may
stop working at some point after bootup if the host enables numad. We finally
found that the bug was introduced by the 'return false' statement, which left
some stale EPT entries unflushed.

I am wondering if we can refactor related functions slightly to make it harder
to make such mistakes and make it easier to identify them. e.g., we could make
"@flush" an in/out parameter of tdp_mmu_zap_leafs(), kvm_tdp_mmu_zap_leafs()
and kvm_tdp_mmu_unmap_gfn_range(). It looks more apparent that "*flush = false"
below could be problematic if the changes were something like:

	if (!zap_private && is_private_sp(root)) {
		*flush = false;
		return;
	}


>+
> 	rcu_read_lock();
> 
> 	for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) {
>@@ -810,13 +815,15 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
>  * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
>  * more SPTEs were zapped since the MMU lock was last acquired.
>  */
>-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
>+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush,
>+			   bool zap_private)
> {
> 	struct kvm_mmu_page *root;
> 
> 	lockdep_assert_held_write(&kvm->mmu_lock);
> 	for_each_tdp_mmu_root_yield_safe(kvm, root)
>-		flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
>+		flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush,
>+					  zap_private && is_private_sp(root));
> 
> 	return flush;
> }
>@@ -891,7 +898,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
>  * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference.
>  * See kvm_tdp_mmu_get_vcpu_root_hpa().
>  */
>-void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
>+void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm, bool skip_private)
> {
> 	struct kvm_mmu_page *root;
> 
>@@ -916,6 +923,12 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
> 	 * or get/put references to roots.
> 	 */
> 	list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
>+		/*
>+		 * Skip private root since private page table
>+		 * is only torn down when VM is destroyed.
>+		 */
>+		if (skip_private && is_private_sp(root))
>+			continue;
> 		/*
> 		 * Note, invalid roots can outlive a memslot update!  Invalid
> 		 * roots must be *zapped* before the memslot update completes,
>@@ -1104,14 +1117,26 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> 	return ret;
> }
> 
>+/* Used by mmu notifier via kvm_unmap_gfn_range() */
> bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
> 				 bool flush)
> {
> 	struct kvm_mmu_page *root;
>+	bool zap_private = false;
>+
>+	if (kvm_gfn_shared_mask(kvm)) {
>+		if (!range->only_private && !range->only_shared)
>+			/* attributes change */
>+			zap_private = !(range->arg.attributes &
>+					KVM_MEMORY_ATTRIBUTE_PRIVATE);
>+		else
>+			zap_private = range->only_private;
>+	}
> 
> 	__for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
> 		flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
>-					  range->may_block, flush);
>+					  range->may_block, flush,
>+					  zap_private && is_private_sp(root));
> 
> 	return flush;
> }
>diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
>index 20d97aa46c49..b3cf58a50357 100644
>--- a/arch/x86/kvm/mmu/tdp_mmu.h
>+++ b/arch/x86/kvm/mmu/tdp_mmu.h
>@@ -19,10 +19,11 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
> 
> void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root);
> 
>-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
>+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush,
>+			   bool zap_private);
> bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
> void kvm_tdp_mmu_zap_all(struct kvm *kvm);
>-void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);
>+void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm, bool skip_private);
> void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm);
> 
> int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
>-- 
>2.25.1
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ