[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aCJIZgHi67_lze_v@google.com>
Date: Mon, 12 May 2025 12:13:42 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Jon Kohler <jon@...anix.com>
Cc: pbonzini@...hat.com, tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
Sergey Dyasli <sergey.dyasli@...anix.com>
Subject: Re: [RFC PATCH 12/18] KVM: x86/mmu: Introduce shadow_ux_mask
On Thu, Mar 13, 2025, Jon Kohler wrote:
> @@ -28,6 +28,7 @@ u64 __read_mostly shadow_host_writable_mask;
> u64 __read_mostly shadow_mmu_writable_mask;
> u64 __read_mostly shadow_nx_mask;
> u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
> +u64 __read_mostly shadow_ux_mask;
> u64 __read_mostly shadow_user_mask;
> u64 __read_mostly shadow_accessed_mask;
> u64 __read_mostly shadow_dirty_mask;
> @@ -313,8 +314,14 @@ u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte,
> * the page executable as the NX hugepage mitigation no longer
> * applies.
> */
> - if ((role.access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(kvm))
> + if ((role.access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(kvm)) {
This is wrong, and probably so is every other chunk of KVM that looks at
ACC_EXEC_MASK. E.g. if a guest hugepage is executable for user but not supervisor,
KVM will fail to make the small child user-executable.
The bug in make_spte() is even worse, because KVM would let an MBEC-aware guest
trigger the iTLB multi-hit #MC.
> child_spte = make_spte_executable(child_spte);
> + // TODO: For LKML: switch to vcpu->arch.pt_guest_exec_control? up
> + // for suggestions on how best to toggle this.
No, it belongs in the role.
> + if (enable_pt_guest_exec_control &&
> + role.access & ACC_USER_EXEC_MASK)
> + child_spte |= shadow_ux_mask;
> + }
> }
>
> return child_spte;
> @@ -326,7 +333,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
> u64 spte = SPTE_MMU_PRESENT_MASK;
>
> spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
> - shadow_user_mask | shadow_x_mask | shadow_me_value;
> + shadow_user_mask | shadow_x_mask | shadow_ux_mask | shadow_me_value;
>
> if (ad_disabled)
> spte |= SPTE_TDP_AD_DISABLED;
> @@ -420,7 +427,8 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
> }
> EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask);
>
> -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
> +void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only,
> + bool has_guest_exec_ctrl)
> {
> shadow_user_mask = VMX_EPT_READABLE_MASK;
> shadow_accessed_mask = has_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull;
> @@ -428,8 +436,14 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
> shadow_nx_mask = 0ull;
> shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
> /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
> + // For LKML Review:
> + // Do we need to modify shadow_present_mask in the MBEC case?
No, because MBEC bifurcates X, it doesn't change whether or not an EPTE can be
X without being R. From the SDM:
1. If the “mode-based execute control for EPT” VM-execution control is 1,
setting bit 0 indicates also that software may also configure EPT
paging-structure entries in which bits 1:0 are both clear and in which bit 10
is set (indicating a translation that can be used to fetch instructions from a
supervisor-mode linear address or a user-mode linear address).
> shadow_present_mask =
> (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
> +
> + shadow_ux_mask =
> + has_guest_exec_ctrl ? VMX_EPT_USER_EXECUTABLE_MASK : 0ull;
This is EPT specific code, just call this what it is:
shadow_ux_mask = has_mbec ? VMX_EPT_USER_EXECUTABLE_MASK : 0ull;
> +
> /*
> * EPT overrides the host MTRRs, and so KVM must program the desired
> * memtype directly into the SPTEs. Note, this mask is just the mask
> @@ -484,6 +498,7 @@ void kvm_mmu_reset_all_pte_masks(void)
> shadow_dirty_mask = PT_DIRTY_MASK;
> shadow_nx_mask = PT64_NX_MASK;
> shadow_x_mask = 0;
> + shadow_ux_mask = 0;
> shadow_present_mask = PT_PRESENT_MASK;
>
> /*
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index d9e22133b6d0..dc2f0dc9c46e 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -171,6 +171,7 @@ extern u64 __read_mostly shadow_mmu_writable_mask;
> extern u64 __read_mostly shadow_nx_mask;
> extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
> extern u64 __read_mostly shadow_user_mask;
> +extern u64 __read_mostly shadow_ux_mask;
> extern u64 __read_mostly shadow_accessed_mask;
> extern u64 __read_mostly shadow_dirty_mask;
> extern u64 __read_mostly shadow_mmio_value;
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0aadfa924045..d16e3f170258 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8544,7 +8544,8 @@ __init int vmx_hardware_setup(void)
>
> if (enable_ept)
> kvm_mmu_set_ept_masks(enable_ept_ad_bits,
> - cpu_has_vmx_ept_execute_only());
> + cpu_has_vmx_ept_execute_only(),
> + enable_pt_guest_exec_control);
Without the module param, just cpu_has_vmx_mbec().
Powered by blists - more mailing lists