lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424030800.452-1-yan.y.zhao@intel.com>
Date: Thu, 24 Apr 2025 11:08:00 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
	seanjc@...gle.com
Cc: linux-kernel@...r.kernel.org,
	kvm@...r.kernel.org,
	x86@...nel.org,
	rick.p.edgecombe@...el.com,
	dave.hansen@...el.com,
	kirill.shutemov@...el.com,
	tabba@...gle.com,
	ackerleytng@...gle.com,
	quic_eberman@...cinc.com,
	michael.roth@....com,
	david@...hat.com,
	vannapurve@...gle.com,
	vbabka@...e.cz,
	jroedel@...e.de,
	thomas.lendacky@....com,
	pgonda@...gle.com,
	zhiquan1.li@...el.com,
	fan.du@...el.com,
	jun.miao@...el.com,
	ira.weiny@...el.com,
	isaku.yamahata@...el.com,
	xiaoyao.li@...el.com,
	binbin.wu@...ux.intel.com,
	chao.p.peng@...el.com,
	Yan Zhao <yan.y.zhao@...el.com>
Subject: [RFC PATCH 15/21] KVM: TDX: Support huge page splitting with exclusive kvm->mmu_lock

From: Xiaoyao Li <xiaoyao.li@...el.com>

Implement the split_external_spt hook to support huge page splitting for
TDX when kvm->mmu_lock is held for writing.

Invoke tdh_mem_range_block(), tdh_mem_track(), kicking off vCPUs,
tdh_mem_page_demote() in sequence. Since kvm->mmu_lock is held for writing,
simply kick off vCPUs on tdx_operand_busy() to ensure the second SEAMCALL
invocation succeeds.

TDX module may return TDX_INTERRUPTED_RESTARTABLE when there is a pending
interrupt on the host side during tdh_mem_page_demote(). Retry indefinitely
on this error, as with exclusive kvm->mmu_lock the pending interrupt is for
host only.

[Yan: Split patch for exclusive mmu_lock only, handled busy error ]

Signed-off-by: Xiaoyao Li <xiaoyao.li@...el.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
 arch/x86/kvm/vmx/main.c      |  1 +
 arch/x86/kvm/vmx/tdx.c       | 45 ++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/tdx_errno.h |  1 +
 arch/x86/kvm/vmx/x86_ops.h   |  9 ++++++++
 4 files changed, 56 insertions(+)

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index ae8540576821..16c0c31dd066 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -62,6 +62,7 @@ static __init int vt_hardware_setup(void)
 		vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
 		vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
 		vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
+		vt_x86_ops.split_external_spt = tdx_sept_split_private_spt;
 		vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
 	}
 
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index dd63a634e633..4386e1a0323e 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1806,6 +1806,51 @@ int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
 	return tdx_reclaim_page(virt_to_page(private_spt), PG_LEVEL_4K);
 }
 
+static int tdx_spte_demote_private_spte(struct kvm *kvm, gfn_t gfn,
+					enum pg_level level, struct page *page)
+{
+	int tdx_level = pg_level_to_tdx_sept_level(level);
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+	gpa_t gpa = gfn_to_gpa(gfn);
+	u64 err, entry, level_state;
+
+	do {
+		err = tdh_mem_page_demote(&kvm_tdx->td, gpa, tdx_level, page,
+					  &entry, &level_state);
+	} while (err == TDX_INTERRUPTED_RESTARTABLE);
+
+	if (unlikely(tdx_operand_busy(err))) {
+		tdx_no_vcpus_enter_start(kvm);
+		err = tdh_mem_page_demote(&kvm_tdx->td, gpa, tdx_level, page,
+					  &entry, &level_state);
+		tdx_no_vcpus_enter_stop(kvm);
+	}
+
+	if (KVM_BUG_ON(err, kvm)) {
+		pr_tdx_error_2(TDH_MEM_PAGE_DEMOTE, err, entry, level_state);
+		return -EIO;
+	}
+	return 0;
+}
+
+int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+			       void *private_spt)
+{
+	struct page *page = virt_to_page(private_spt);
+	int ret;
+
+	if (KVM_BUG_ON(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE || level != PG_LEVEL_2M, kvm))
+		return -EINVAL;
+
+	ret = tdx_sept_zap_private_spte(kvm, gfn, level, page);
+	if (ret <= 0)
+		return ret;
+
+	tdx_track(kvm);
+
+	return tdx_spte_demote_private_spte(kvm, gfn, level, page);
+}
+
 int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
 				 enum pg_level level, kvm_pfn_t pfn)
 {
diff --git a/arch/x86/kvm/vmx/tdx_errno.h b/arch/x86/kvm/vmx/tdx_errno.h
index 6ff4672c4181..33589e7fa1e1 100644
--- a/arch/x86/kvm/vmx/tdx_errno.h
+++ b/arch/x86/kvm/vmx/tdx_errno.h
@@ -14,6 +14,7 @@
 #define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE	0x6000000500000000ULL
 #define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE	0x6000000700000000ULL
 #define TDX_INTERRUPTED_RESUMABLE		0x8000000300000000ULL
+#define TDX_INTERRUPTED_RESTARTABLE		0x8000000400000000ULL
 #define TDX_OPERAND_INVALID			0xC000010000000000ULL
 #define TDX_OPERAND_BUSY			0x8000020000000000ULL
 #define TDX_PREVIOUS_TLB_EPOCH_BUSY		0x8000020100000000ULL
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 7c183da7c4d4..df7d4cd1436c 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -158,6 +158,8 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
 			      enum pg_level level, kvm_pfn_t pfn);
 int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
 				 enum pg_level level, kvm_pfn_t pfn);
+int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+			       void *private_spt);
 
 void tdx_flush_tlb_current(struct kvm_vcpu *vcpu);
 void tdx_flush_tlb_all(struct kvm_vcpu *vcpu);
@@ -224,6 +226,13 @@ static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
 	return -EOPNOTSUPP;
 }
 
+static inline int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn,
+					     enum pg_level level,
+					     void *private_spt)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void tdx_flush_tlb_current(struct kvm_vcpu *vcpu) {}
 static inline void tdx_flush_tlb_all(struct kvm_vcpu *vcpu) {}
 static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {}
-- 
2.43.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ