lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <72655345e07a02028c9239ccb2c3633dd72bbf9d.1692119201.git.isaku.yamahata@intel.com>
Date:   Tue, 15 Aug 2023 10:18:55 -0700
From:   isaku.yamahata@...el.com
To:     kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     isaku.yamahata@...el.com, isaku.yamahata@...il.com,
        Michael Roth <michael.roth@....com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>, erdemaktas@...gle.com,
        Sagi Shahar <sagis@...gle.com>,
        David Matlack <dmatlack@...gle.com>,
        Kai Huang <kai.huang@...el.com>,
        Zhi Wang <zhi.wang.linux@...il.com>, chen.bo@...el.com,
        linux-coco@...ts.linux.dev,
        Chao Peng <chao.p.peng@...ux.intel.com>,
        Ackerley Tng <ackerleytng@...gle.com>,
        Vishal Annapurve <vannapurve@...gle.com>,
        Yuan Yao <yuan.yao@...ux.intel.com>,
        Jarkko Sakkinen <jarkko@...nel.org>,
        Xu Yilun <yilun.xu@...el.com>,
        Quentin Perret <qperret@...gle.com>, wei.w.wang@...el.com,
        Fuad Tabba <tabba@...gle.com>
Subject: [PATCH 8/8] RFC: KVM: gmem: Guarantee the order of destruction

From: Isaku Yamahata <isaku.yamahata@...el.com>

Call kvm_flush_shadow_all() before releasing kvm gmem file on the guest
destruction.

The current gmem doesn't guarantee the destruction order between kvm gmem
and kvm_mmu_notifier_release(), which calls kvm_flush_shadow_all().  When
destructing TD guest, it's efficient to call kvm_flush_shadow_all() before
calling kvm_gmem_issue_arch_invalidate() on releasing its struct file
because kvm_flush_shadow_all() releases its host key ID (HKID).  After
releasing HKID, the TDX module doesn't have to validate the consistency of
the Secure-EPT structure.

One way is to make struct kvm to reference kvm gmem file.  The current gmem
implementation chose to make kvm gmem file to reference struct kvm.  So
reference from struct kvm to reference kvm gmem file results in a circular
reference.  Use kvm_mmu_notifier_release() to break it.

Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
---
 include/linux/kvm_host.h | 24 ++++++++++++++++++++++++
 virt/kvm/kvm_main.c      | 23 ++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 349b0bf81fa5..d717945702a8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -594,6 +594,10 @@ struct kvm_memory_slot {
 	u16 as_id;
 
 #ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
+	struct file *file;
+#endif
+	/* Private guest_mem */
 	struct {
 		struct file __rcu *file;
 		pgoff_t pgoff;
@@ -601,6 +605,26 @@ struct kvm_memory_slot {
 #endif
 };
 
+static inline int kvm_memslot_gmem_fget(struct kvm_memory_slot *memslot, int fd)
+{
+#if defined(CONFIG_KVM_PRIVATE_MEM) && defined(CONFIG_KVM_GENERIC_MMU_NOTIFIER)
+	memslot->file = fget(fd);
+	if (!memslot->file)
+		return -EBADF;
+#endif
+	return 0;
+}
+
+static inline void kvm_memslot_gmem_fput(struct kvm_memory_slot *memslot)
+{
+#if defined(CONFIG_KVM_PRIVATE_MEM) && defined(CONFIG_KVM_GENERIC_MMU_NOTIFIER)
+	if (memslot->file) {
+		fput(memslot->file);
+		memslot->file = NULL;
+	}
+#endif
+}
+
 static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
 {
 	return slot && (slot->flags & KVM_MEM_PRIVATE);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4855d0b7a859..35bc3b64b7e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -926,6 +926,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	int idx;
+	int i;
 
 	/*
 	 * Avoide race with kvm_gmem_release().
@@ -936,6 +937,18 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	kvm_flush_shadow_all(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
+
+	/* Break circular reference count: kvm->gmem, gmem->kvm. */
+	for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+		struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+		struct kvm_memory_slot *memslot;
+		struct hlist_node *tmp;
+		int bkt;
+
+		hash_for_each_safe(slots->id_hash, bkt, tmp, memslot, id_node[slots->node_idx])
+			kvm_memslot_gmem_fput(memslot);
+	}
+
 	mutex_unlock(&kvm->slots_lock);
 }
 
@@ -1008,8 +1021,10 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /* This does not remove the slot from struct kvm_memslots data structures */
 static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	if (slot->flags & KVM_MEM_PRIVATE)
+	if (slot->flags & KVM_MEM_PRIVATE) {
 		kvm_gmem_unbind(slot);
+		kvm_memslot_gmem_fput(slot);
+	}
 
 	kvm_destroy_dirty_bitmap(slot);
 
@@ -1734,6 +1749,8 @@ static void kvm_commit_memory_region(struct kvm *kvm,
 		if (old->dirty_bitmap && !new->dirty_bitmap)
 			kvm_destroy_dirty_bitmap(old);
 
+		kvm_memslot_gmem_fput(old);
+
 		/*
 		 * The final quirk.  Free the detached, old slot, but only its
 		 * memory, not any metadata.  Metadata, including arch specific
@@ -2088,6 +2105,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	new->flags = mem->flags;
 	new->userspace_addr = mem->userspace_addr;
 	if (mem->flags & KVM_MEM_PRIVATE) {
+		r = kvm_memslot_gmem_fget(new, mem->gmem_fd);
+		if (r)
+			goto out;
 		r = kvm_gmem_bind(kvm, new, mem->gmem_fd, mem->gmem_offset);
 		if (r)
 			goto out;
@@ -2103,6 +2123,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (mem->flags & KVM_MEM_PRIVATE)
 		kvm_gmem_unbind(new);
 out:
+	kvm_memslot_gmem_fput(new);
 	kfree(new);
 	return r;
 }
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ