lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251223054806.1611168-7-jon@nutanix.com>
Date: Mon, 22 Dec 2025 22:47:59 -0700
From: Jon Kohler <jon@...anix.com>
To: seanjc@...gle.com, pbonzini@...hat.com, tglx@...utronix.de,
        mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
        x86@...nel.org, hpa@...or.com, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org
Cc: ken@...elabs.ch, Alexander.Grest@...rosoft.com, chao.gao@...el.com,
        madvenka@...ux.microsoft.com, mic@...ikod.net, nsaenz@...zon.es,
        tao1.su@...ux.intel.com, xiaoyao.li@...el.com, zhao1.liu@...el.com,
        Jon Kohler <jon@...anix.com>
Subject: [PATCH 6/8] KVM: VMX: enhance EPT violation handler for MBEC

Extend __vmx_handle_ept_violation to understand mmu_has_mbec and the
differences between user mode and kernel mode fetches.

Add synthetic PF bit PFERR_USER_FETCH_MASK in EPT violation handler,
used in error_code as a signal that the EPT violation is a user mode
instruction fetch into permission_fault.

Extend permissions_fault and route mmu_has_mbec to a special handler,
mbec_permission_fault, since permission_fault can no longer trivially
shift to figure out if there was a permission fault or not.

Signed-off-by: Jon Kohler <jon@...anix.com>
---
 arch/x86/include/asm/kvm_host.h |  8 +++-
 arch/x86/kvm/mmu.h              |  7 +++-
 arch/x86/kvm/mmu/mmu.c          | 66 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/spte.h         | 14 ++++---
 arch/x86/kvm/vmx/common.h       | 22 ++++++-----
 5 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 66afcff43ec5..99381c55fceb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,7 +286,13 @@ enum x86_intercept_stage;
  * when the guest was accessing private memory.
  */
 #define PFERR_PRIVATE_ACCESS   BIT_ULL(49)
-#define PFERR_SYNTHETIC_MASK   (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
+/*
+ * USER_FETCH_MASK is a KVM-defined flag used to indicate user fetches when
+ * translating EPT violations for Intel MBEC.
+ */
+#define PFERR_USER_FETCH_MASK  BIT_ULL(50)
+#define PFERR_SYNTHETIC_MASK   (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS | \
+				PFERR_USER_FETCH_MASK)
 
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC	0
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 558a15ff82e6..d7bf679183f7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -95,6 +95,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 				u64 fault_address, char *insn, int insn_len);
 void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *mmu);
+bool mbec_permission_fault(struct kvm_vcpu *vcpu, unsigned int pte_access,
+			   unsigned int pfec);
 
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
@@ -216,7 +218,10 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 
 	kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
 
-	fault = (mmu->permissions[index] >> pte_access) & 1;
+	if (mmu_has_mbec(vcpu))
+		fault = mbec_permission_fault(vcpu, pte_access, pfec);
+	else
+		fault = (mmu->permissions[index] >> pte_access) & 1;
 
 	WARN_ON_ONCE(pfec & (PFERR_PK_MASK | PFERR_SS_MASK | PFERR_RSVD_MASK));
 	if (unlikely(mmu->pkru_mask)) {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b0eb8d4c5ef2..673f2cebc36c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5664,6 +5664,72 @@ void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
 	reset_guest_paging_metadata(vcpu, mmu);
 }
 
+/*
+ * Check permissions for MBEC-enabled EPT accesses.
+ * Handles all permission checks with MBEC awareness (UX/KX distinction).
+ *
+ * Returns true if access should fault, false otherwise.
+ */
+bool mbec_permission_fault(struct kvm_vcpu *vcpu, unsigned int pte_access,
+			   unsigned int pfec)
+{
+	bool has_ux = pte_access & ACC_USER_EXEC_MASK;
+	bool has_kx = pte_access & ACC_EXEC_MASK;
+	bool write_fault = false;
+	bool fetch_fault = false;
+	bool read_fault = false;
+
+	/*
+	 * Fault conditions:
+	 * - Write fault: pfec has WRITE_MASK set but pte_access lacks
+	 *   WRITE permission
+	 * - Fetch fault: pfec has FETCH_MASK set but pte_access lacks
+	 *   matching execute permission. For MBEC, checks both guest PTE
+	 *   U/S bits and CPL, both are additive:
+	 *   * If neither UX nor KX is set:
+	 *       always fault (no execute permission at all)
+	 *   * User fetch (guest PTE user OR CPL > 0):
+	 *       requires UX permission (has_ux)
+	 *   * Kernel fetch (guest PTE supervisor AND CPL = 0):
+	 *       requires KX permission (has_kx)
+	 * - Read fault: pfec has USER_MASK set (read access in EPT
+	 *   context) but pte_access lacks read permission
+	 *
+	 * Note: In EPT context, PFERR_USER_MASK indicates read access,
+	 * not user-mode access. This is different from regular paging
+	 * where PFERR_USER_MASK means user-mode (CPL=3).
+	 * ACC_USER_MASK in EPT context maps to VMX_EPT_READABLE_MASK
+	 * (bit 0), the readable permission.
+	 */
+
+	/* Check write permission independently */
+	if (pfec & PFERR_WRITE_MASK)
+		write_fault = !(pte_access & ACC_WRITE_MASK);
+
+	/* Check fetch permission independently */
+	if (pfec & PFERR_FETCH_MASK) {
+		/*
+		 * For MBEC, check execute permissions. A fetch faults if:
+		 * - User fetch (guest PTE user OR CPL > 0) lacks UX permission
+		 * - Kernel fetch (guest PTE supervisor AND CPL = 0) lacks KX permission
+		 */
+		bool is_user_fetch = (pfec & PFERR_USER_FETCH_MASK) ||
+				     (kvm_x86_call(get_cpl)(vcpu) > 0);
+
+		/*
+		 * A user-mode fetch requires user-execute permission (UX).
+		 * A kernel-mode fetch requires kernel-execute permission (KX).
+		 */
+		fetch_fault = is_user_fetch ? !has_ux : !has_kx;
+	}
+
+	/* Check read permission: PFERR_USER_MASK indicates read in EPT */
+	if (pfec & PFERR_USER_MASK)
+		read_fault = !(pte_access & ACC_USER_MASK);
+
+	return write_fault || fetch_fault || read_fault;
+}
+
 static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
 {
 	int maxpa;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 74fb1fe60d89..cb94f039898d 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -383,14 +383,18 @@ static inline bool is_executable_pte_fault(u64 spte,
 	 */
 	if (WARN_ON_ONCE(!shadow_x_mask))
 		return fault->user || !(spte & shadow_user_mask);
-
 	/*
-	 * For TDP MMU, the fault->user bit indicates a read access,
-	 * not the guest's CPL. For execute faults, check both execute
-	 * permissions since we don't know the actual CPL.
+	 * For TDP MMU, fault->user indicates a read access, not CPL.
+	 * For execute faults, we don't know the CPL here, so we can't
+	 * definitively check permissions. Being optimistic and checking
+	 * for any execute permission can lead to infinite fault loops
+	 * if the wrong type of execute permission is present (e.g. UX
+	 * only for a kernel fetch). The safe approach is to be
+	 * pessimistic and return false, forcing the fault to the slow
+	 * path which can do a full permission check.
 	 */
 	if (fault->is_tdp)
-		return spte & (shadow_x_mask | shadow_ux_mask);
+		return false;
 
 	return spte & (fault->user ? shadow_ux_mask : shadow_x_mask);
 }
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index adf925500b9e..96bdca78696d 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -83,6 +83,7 @@ static inline bool vt_is_tdx_private_gpa(struct kvm *kvm, gpa_t gpa)
 static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
 					     unsigned long exit_qualification)
 {
+	unsigned long rwx_mask;
 	u64 error_code;
 
 	/* Is it a read fault? */
@@ -92,16 +93,17 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
 	error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
 		      ? PFERR_WRITE_MASK : 0;
 	/* Is it a fetch fault? */
-	error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
-		      ? PFERR_FETCH_MASK : 0;
-	/*
-	 * ept page table entry is present?
-	 * note: unconditionally clear USER_EXEC until mode-based
-	 * execute control is implemented
-	 */
-	error_code |= (exit_qualification &
-		       (EPT_VIOLATION_PROT_MASK & ~EPT_VIOLATION_PROT_USER_EXEC))
-		      ? PFERR_PRESENT_MASK : 0;
+	if (exit_qualification & EPT_VIOLATION_ACC_INSTR) {
+		error_code |= PFERR_FETCH_MASK;
+		if (mmu_has_mbec(vcpu) &&
+		    exit_qualification & EPT_VIOLATION_PROT_USER_EXEC)
+			error_code |= PFERR_USER_FETCH_MASK;
+	}
+	/* ept page table entry is present? */
+	rwx_mask = EPT_VIOLATION_PROT_MASK;
+	if (mmu_has_mbec(vcpu))
+		rwx_mask |= EPT_VIOLATION_PROT_USER_EXEC;
+	error_code |= (exit_qualification & rwx_mask) ? PFERR_PRESENT_MASK : 0;
 
 	if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID)
 		error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ