linux-kernel - [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <a3a19de92c7ac6e607ac3e663d84a4312876084b.1687474039.git.isaku.yamahata@intel.com>
Date:   Thu, 22 Jun 2023 16:16:28 -0700
From:   isaku.yamahata@...el.com
To:     kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     isaku.yamahata@...el.com, isaku.yamahata@...il.com,
        Paolo Bonzini <pbonzini@...hat.com>, erdemaktas@...gle.com,
        Sean Christopherson <seanjc@...gle.com>,
        Sagi Shahar <sagis@...gle.com>,
        David Matlack <dmatlack@...gle.com>,
        Kai Huang <kai.huang@...el.com>,
        Zhi Wang <zhi.wang.linux@...il.com>, chen.bo@...el.com,
        linux-coco@...ts.linux.dev,
        Chao Peng <chao.p.peng@...ux.intel.com>,
        Ackerley Tng <ackerleytng@...gle.com>,
        Vishal Annapurve <vannapurve@...gle.com>,
        Michael Roth <michael.roth@....com>
Subject: [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

From: Isaku Yamahata <isaku.yamahata@...el.com>

Introduce kvm fault type to indicate how to handle kvm page fault.

It is unfortunate and inflexible for kvm_mmu_do_page_fault() to call
kvm_mem_is_private(), eventually looking up memory attributes.  Later
__kvm_faultin_pfn() looks up memory attributes again.  There is a race
condition that other threads can change memory attributes due to not
gaining the mmu lock.  SNP-SEV and TDX define theri way to indicate that
the page fault is private.

Add KVM fault type, add mmu_private_fault_mask to struct kvm_arch for SNP
to determine the fault is private, add gfn_shared_mask to struct kvm_arch
for TDX to determine the fault is private. KVM_FAULT_SHARED_ALWAYS is added
for the conventional guest to avoid over head to lookup memory attributes.

Suggested-by: Michael Roth <michael.roth@....com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
---
Changes v1 -> v2:
- Introduced fault type and replaced is_private with fault_type.
- Add kvm_get_fault_type() to encapsulate the difference.
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/mmu/mmu.c          | 26 ++++++++++++++++++++------
 arch/x86/kvm/mmu/mmu_internal.h | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8ae131dc645d..5afeefc7a516 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1445,6 +1445,12 @@ struct kvm_arch {
 	 */
 #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
 	struct kvm_mmu_memory_cache split_desc_cache;
+
+#ifdef CONFIG_KVM_PROTECTED_VM
+	/* To make the patch compile. */
+	u64 mmu_private_fault_mask;
+	gfn_t gfn_shared_mask;
+#endif
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b8ba7f11c3cb..feec75515f39 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3174,10 +3174,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
 
 static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
 				       const struct kvm_memory_slot *slot,
-				       gfn_t gfn, int max_level, bool is_private)
+				       gfn_t gfn, int max_level,
+				       enum kvm_fault_type fault_type)
 {
 	struct kvm_lpage_info *linfo;
 	int host_level;
+	bool is_private = fault_type == KVM_FAULT_PRIVATE;
 
 	max_level = min(max_level, max_huge_page_level);
 	for ( ; max_level > PG_LEVEL_4K; max_level--) {
@@ -3228,7 +3230,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	 */
 	fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
 						       fault->gfn, fault->max_level,
-						       fault->is_private);
+						       fault->fault_type);
 	if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
 		return;
 
@@ -4328,7 +4330,7 @@ static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
 				    struct kvm_page_fault *fault)
 {
 	vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
-	if (fault->is_private)
+	if (fault->fault_type == KVM_FAULT_PRIVATE)
 		vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
 	else
 		vcpu->run->memory.flags = 0;
@@ -4386,10 +4388,22 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 			return RET_PF_EMULATE;
 	}
 
-	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
-		return kvm_do_memory_fault_exit(vcpu, fault);
+	if (fault->fault_type == KVM_FAULT_SHARED_ALWAYS) {
+		/*
+		 * The conventional case. Don't lookup memory attributes to
+		 * avoid overhead
+		 */
+		fault->fault_type = KVM_FAULT_SHARED;
+	} else if (fault->fault_type == KVM_FAULT_MEM_ATTR) {
+		fault->fault_type = kvm_mem_is_private(vcpu->kvm, fault->gfn) ?
+			KVM_FAULT_PRIVATE : KVM_FAULT_SHARED;
+	} else {
+		if ((fault->fault_type == KVM_FAULT_PRIVATE) !=
+		    kvm_mem_is_private(vcpu->kvm, fault->gfn))
+			return kvm_do_memory_fault_exit(vcpu, fault);
+	}
 
-	if (fault->is_private)
+	if (fault->fault_type == KVM_FAULT_PRIVATE)
 		return kvm_faultin_pfn_private(vcpu, fault);
 
 	async = false;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 7f9ec1e5b136..0ec0b927a391 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
 	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
 }
 
+enum kvm_fault_type {
+	KVM_FAULT_MEM_ATTR,
+	KVM_FAULT_SHARED,
+	KVM_FAULT_SHARED_ALWAYS,
+	KVM_FAULT_PRIVATE,
+};
+
 struct kvm_page_fault {
 	/* arguments to kvm_mmu_do_page_fault.  */
 	const gpa_t addr;
@@ -203,9 +210,10 @@ struct kvm_page_fault {
 
 	/* Derived from mmu and global state.  */
 	const bool is_tdp;
-	const bool is_private;
 	const bool nx_huge_page_workaround_enabled;
 
+	enum kvm_fault_type fault_type;
+
 	/*
 	 * Whether a >4KB mapping can be created or is forbidden due to NX
 	 * hugepages.
@@ -282,6 +290,27 @@ enum {
 	RET_PF_SPURIOUS,
 };
 
+static inline enum kvm_fault_type kvm_get_fault_type(struct kvm *kvm,
+						     gpa_t gpa, u64 err)
+{
+
+#ifdef CONFIG_KVM_PROTECTED_VM
+	/* SEV-SNP handling */
+	if (kvm->arch.mmu_private_fault_mask)
+		return (err & kvm->arch.mmu_private_fault_mask) ?
+			KVM_FAULT_PRIVATE : KVM_FAULT_SHARED;
+
+	/* TDX handling */
+	if (kvm->arch.gfn_shared_mask)
+		return (gpa_to_gfn(gpa) & kvm->arch.gfn_shared_mask) ?
+			KVM_FAULT_SHARED : KVM_FAULT_PRIVATE;
+#endif
+	if (kvm->arch.vm_type == KVM_X86_PROTECTED_VM)
+		return KVM_FAULT_MEM_ATTR;
+	/* Don't query memory attributes. */
+	return KVM_FAULT_SHARED_ALWAYS;
+}
+
 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 					u64 err, bool prefetch, int *emulation_type)
 {
@@ -301,7 +330,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 		.max_level = KVM_MAX_HUGEPAGE_LEVEL,
 		.req_level = PG_LEVEL_4K,
 		.goal_level = PG_LEVEL_4K,
-		.is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
+		.fault_type = kvm_get_fault_type(vcpu->kvm, cr2_or_gpa, err),
 	};
 	int r;
 
-- 
2.25.1