[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251223054806.1611168-7-jon@nutanix.com>
Date: Mon, 22 Dec 2025 22:47:59 -0700
From: Jon Kohler <jon@...anix.com>
To: seanjc@...gle.com, pbonzini@...hat.com, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
x86@...nel.org, hpa@...or.com, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: ken@...elabs.ch, Alexander.Grest@...rosoft.com, chao.gao@...el.com,
madvenka@...ux.microsoft.com, mic@...ikod.net, nsaenz@...zon.es,
tao1.su@...ux.intel.com, xiaoyao.li@...el.com, zhao1.liu@...el.com,
Jon Kohler <jon@...anix.com>
Subject: [PATCH 6/8] KVM: VMX: enhance EPT violation handler for MBEC
Extend __vmx_handle_ept_violation to understand mmu_has_mbec and the
differences between user mode and kernel mode fetches.
Add synthetic PF bit PFERR_USER_FETCH_MASK in EPT violation handler,
used in error_code as a signal that the EPT violation is a user mode
instruction fetch into permission_fault.
Extend permissions_fault and route mmu_has_mbec to a special handler,
mbec_permission_fault, since permission_fault can no longer trivially
shift to figure out if there was a permission fault or not.
Signed-off-by: Jon Kohler <jon@...anix.com>
---
arch/x86/include/asm/kvm_host.h | 8 +++-
arch/x86/kvm/mmu.h | 7 +++-
arch/x86/kvm/mmu/mmu.c | 66 +++++++++++++++++++++++++++++++++
arch/x86/kvm/mmu/spte.h | 14 ++++---
arch/x86/kvm/vmx/common.h | 22 ++++++-----
5 files changed, 100 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 66afcff43ec5..99381c55fceb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,7 +286,13 @@ enum x86_intercept_stage;
* when the guest was accessing private memory.
*/
#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
-#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
+/*
+ * USER_FETCH_MASK is a KVM-defined flag used to indicate user fetches when
+ * translating EPT violations for Intel MBEC.
+ */
+#define PFERR_USER_FETCH_MASK BIT_ULL(50)
+#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS | \
+ PFERR_USER_FETCH_MASK)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 558a15ff82e6..d7bf679183f7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -95,6 +95,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu);
+bool mbec_permission_fault(struct kvm_vcpu *vcpu, unsigned int pte_access,
+ unsigned int pfec);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
@@ -216,7 +218,10 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
- fault = (mmu->permissions[index] >> pte_access) & 1;
+ if (mmu_has_mbec(vcpu))
+ fault = mbec_permission_fault(vcpu, pte_access, pfec);
+ else
+ fault = (mmu->permissions[index] >> pte_access) & 1;
WARN_ON_ONCE(pfec & (PFERR_PK_MASK | PFERR_SS_MASK | PFERR_RSVD_MASK));
if (unlikely(mmu->pkru_mask)) {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b0eb8d4c5ef2..673f2cebc36c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5664,6 +5664,72 @@ void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
reset_guest_paging_metadata(vcpu, mmu);
}
+/*
+ * Check permissions for MBEC-enabled EPT accesses.
+ * Handles all permission checks with MBEC awareness (UX/KX distinction).
+ *
+ * Returns true if access should fault, false otherwise.
+ */
+bool mbec_permission_fault(struct kvm_vcpu *vcpu, unsigned int pte_access,
+ unsigned int pfec)
+{
+ bool has_ux = pte_access & ACC_USER_EXEC_MASK;
+ bool has_kx = pte_access & ACC_EXEC_MASK;
+ bool write_fault = false;
+ bool fetch_fault = false;
+ bool read_fault = false;
+
+ /*
+ * Fault conditions:
+ * - Write fault: pfec has WRITE_MASK set but pte_access lacks
+ * WRITE permission
+ * - Fetch fault: pfec has FETCH_MASK set but pte_access lacks
+ * matching execute permission. For MBEC, checks both guest PTE
+ * U/S bits and CPL, both are additive:
+ * * If neither UX nor KX is set:
+ * always fault (no execute permission at all)
+ * * User fetch (guest PTE user OR CPL > 0):
+ * requires UX permission (has_ux)
+ * * Kernel fetch (guest PTE supervisor AND CPL = 0):
+ * requires KX permission (has_kx)
+ * - Read fault: pfec has USER_MASK set (read access in EPT
+ * context) but pte_access lacks read permission
+ *
+ * Note: In EPT context, PFERR_USER_MASK indicates read access,
+ * not user-mode access. This is different from regular paging
+ * where PFERR_USER_MASK means user-mode (CPL=3).
+ * ACC_USER_MASK in EPT context maps to VMX_EPT_READABLE_MASK
+ * (bit 0), the readable permission.
+ */
+
+ /* Check write permission independently */
+ if (pfec & PFERR_WRITE_MASK)
+ write_fault = !(pte_access & ACC_WRITE_MASK);
+
+ /* Check fetch permission independently */
+ if (pfec & PFERR_FETCH_MASK) {
+ /*
+ * For MBEC, check execute permissions. A fetch faults if:
+ * - User fetch (guest PTE user OR CPL > 0) lacks UX permission
+ * - Kernel fetch (guest PTE supervisor AND CPL = 0) lacks KX permission
+ */
+ bool is_user_fetch = (pfec & PFERR_USER_FETCH_MASK) ||
+ (kvm_x86_call(get_cpl)(vcpu) > 0);
+
+ /*
+ * A user-mode fetch requires user-execute permission (UX).
+ * A kernel-mode fetch requires kernel-execute permission (KX).
+ */
+ fetch_fault = is_user_fetch ? !has_ux : !has_kx;
+ }
+
+ /* Check read permission: PFERR_USER_MASK indicates read in EPT */
+ if (pfec & PFERR_USER_MASK)
+ read_fault = !(pte_access & ACC_USER_MASK);
+
+ return write_fault || fetch_fault || read_fault;
+}
+
static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
{
int maxpa;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 74fb1fe60d89..cb94f039898d 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -383,14 +383,18 @@ static inline bool is_executable_pte_fault(u64 spte,
*/
if (WARN_ON_ONCE(!shadow_x_mask))
return fault->user || !(spte & shadow_user_mask);
-
/*
- * For TDP MMU, the fault->user bit indicates a read access,
- * not the guest's CPL. For execute faults, check both execute
- * permissions since we don't know the actual CPL.
+ * For TDP MMU, fault->user indicates a read access, not CPL.
+ * For execute faults, we don't know the CPL here, so we can't
+ * definitively check permissions. Being optimistic and checking
+ * for any execute permission can lead to infinite fault loops
+ * if the wrong type of execute permission is present (e.g. UX
+ * only for a kernel fetch). The safe approach is to be
+ * pessimistic and return false, forcing the fault to the slow
+ * path which can do a full permission check.
*/
if (fault->is_tdp)
- return spte & (shadow_x_mask | shadow_ux_mask);
+ return false;
return spte & (fault->user ? shadow_ux_mask : shadow_x_mask);
}
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index adf925500b9e..96bdca78696d 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -83,6 +83,7 @@ static inline bool vt_is_tdx_private_gpa(struct kvm *kvm, gpa_t gpa)
static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
unsigned long exit_qualification)
{
+ unsigned long rwx_mask;
u64 error_code;
/* Is it a read fault? */
@@ -92,16 +93,17 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
? PFERR_WRITE_MASK : 0;
/* Is it a fetch fault? */
- error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
- ? PFERR_FETCH_MASK : 0;
- /*
- * ept page table entry is present?
- * note: unconditionally clear USER_EXEC until mode-based
- * execute control is implemented
- */
- error_code |= (exit_qualification &
- (EPT_VIOLATION_PROT_MASK & ~EPT_VIOLATION_PROT_USER_EXEC))
- ? PFERR_PRESENT_MASK : 0;
+ if (exit_qualification & EPT_VIOLATION_ACC_INSTR) {
+ error_code |= PFERR_FETCH_MASK;
+ if (mmu_has_mbec(vcpu) &&
+ exit_qualification & EPT_VIOLATION_PROT_USER_EXEC)
+ error_code |= PFERR_USER_FETCH_MASK;
+ }
+ /* ept page table entry is present? */
+ rwx_mask = EPT_VIOLATION_PROT_MASK;
+ if (mmu_has_mbec(vcpu))
+ rwx_mask |= EPT_VIOLATION_PROT_USER_EXEC;
+ error_code |= (exit_qualification & rwx_mask) ? PFERR_PRESENT_MASK : 0;
if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID)
error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
--
2.43.0
Powered by blists - more mailing lists