[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aad3d385-5743-4f81-992a-22d1701c3611@zytor.com>
Date: Sat, 2 Aug 2025 10:33:37 -0700
From: Xin Li <xin@...or.com>
To: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-doc@...r.kernel.org
Cc: pbonzini@...hat.com, seanjc@...gle.com, corbet@....net, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
x86@...nel.org, hpa@...or.com, luto@...nel.org, peterz@...radead.org,
andrew.cooper3@...rix.com, chao.gao@...el.com, hch@...radead.org
Subject: Re: [PATCH v5A 20/23] KVM: nVMX: Add FRED VMCS fields to nested VMX
context handling
> @@ -4531,6 +4593,27 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
> vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
> vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
> vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
> +
> + vmx->nested.pre_vmexit_fred_config = vmcs_read64(GUEST_IA32_FRED_CONFIG);
> + vmx->nested.pre_vmexit_fred_rsp1 = vmcs_read64(GUEST_IA32_FRED_RSP1);
> + vmx->nested.pre_vmexit_fred_rsp2 = vmcs_read64(GUEST_IA32_FRED_RSP2);
> + vmx->nested.pre_vmexit_fred_rsp3 = vmcs_read64(GUEST_IA32_FRED_RSP3);
> + vmx->nested.pre_vmexit_fred_stklvls = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
> + vmx->nested.pre_vmexit_fred_ssp1 = vmcs_read64(GUEST_IA32_FRED_SSP1);
> + vmx->nested.pre_vmexit_fred_ssp2 = vmcs_read64(GUEST_IA32_FRED_SSP2);
> + vmx->nested.pre_vmexit_fred_ssp3 = vmcs_read64(GUEST_IA32_FRED_SSP3);
This ...
> +
> + if (nested_cpu_save_guest_fred_state(vmcs12)) {
> + vmcs12->guest_ia32_fred_config = vmx->nested.pre_vmexit_fred_config;
> + vmcs12->guest_ia32_fred_rsp1 = vmx->nested.pre_vmexit_fred_rsp1;
> + vmcs12->guest_ia32_fred_rsp2 = vmx->nested.pre_vmexit_fred_rsp2;
> + vmcs12->guest_ia32_fred_rsp3 = vmx->nested.pre_vmexit_fred_rsp3;
> + vmcs12->guest_ia32_fred_stklvls = vmx->nested.pre_vmexit_fred_stklvls;
> + vmcs12->guest_ia32_fred_ssp1 = vmx->nested.pre_vmexit_fred_ssp1;
> + vmcs12->guest_ia32_fred_ssp2 = vmx->nested.pre_vmexit_fred_ssp2;
> + vmcs12->guest_ia32_fred_ssp3 = vmx->nested.pre_vmexit_fred_ssp3;
> + }
> +
> vmcs12->guest_pending_dbg_exceptions =
> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>
> @@ -4761,6 +4860,26 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
> vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
>
> + if (nested_cpu_load_host_fred_state(vmcs12)) {
> + vmcs_write64(GUEST_IA32_FRED_CONFIG, vmcs12->host_ia32_fred_config);
> + vmcs_write64(GUEST_IA32_FRED_RSP1, vmcs12->host_ia32_fred_rsp1);
> + vmcs_write64(GUEST_IA32_FRED_RSP2, vmcs12->host_ia32_fred_rsp2);
> + vmcs_write64(GUEST_IA32_FRED_RSP3, vmcs12->host_ia32_fred_rsp3);
> + vmcs_write64(GUEST_IA32_FRED_STKLVLS, vmcs12->host_ia32_fred_stklvls);
> + vmcs_write64(GUEST_IA32_FRED_SSP1, vmcs12->host_ia32_fred_ssp1);
> + vmcs_write64(GUEST_IA32_FRED_SSP2, vmcs12->host_ia32_fred_ssp2);
> + vmcs_write64(GUEST_IA32_FRED_SSP3, vmcs12->host_ia32_fred_ssp3);
> + } else {
> + vmcs_write64(GUEST_IA32_FRED_CONFIG, vmx->nested.pre_vmexit_fred_config);
> + vmcs_write64(GUEST_IA32_FRED_RSP1, vmx->nested.pre_vmexit_fred_rsp1);
> + vmcs_write64(GUEST_IA32_FRED_RSP2, vmx->nested.pre_vmexit_fred_rsp2);
> + vmcs_write64(GUEST_IA32_FRED_RSP3, vmx->nested.pre_vmexit_fred_rsp3);
> + vmcs_write64(GUEST_IA32_FRED_STKLVLS, vmx->nested.pre_vmexit_fred_stklvls);
> + vmcs_write64(GUEST_IA32_FRED_SSP1, vmx->nested.pre_vmexit_fred_ssp1);
> + vmcs_write64(GUEST_IA32_FRED_SSP2, vmx->nested.pre_vmexit_fred_ssp2);
> + vmcs_write64(GUEST_IA32_FRED_SSP3, vmx->nested.pre_vmexit_fred_ssp3);
And this are actually nops. IOW, if I don't add this snippet of code,
the CPU still retains the guest FRED MSRs, i.e., using guest FRED state
from vmcs02 as that of vmcs01.
> + }
> +
> /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
> if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
> vmcs_write64(GUEST_BNDCFGS, 0);
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 617cbec5c9b3..885e48fe33c4 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -181,6 +181,39 @@ struct nested_vmx {
> */
> u64 pre_vmenter_debugctl;
> u64 pre_vmenter_bndcfgs;
> + u64 pre_vmenter_fred_config;
> + u64 pre_vmenter_fred_rsp1;
> + u64 pre_vmenter_fred_rsp2;
> + u64 pre_vmenter_fred_rsp3;
> + u64 pre_vmenter_fred_stklvls;
> + u64 pre_vmenter_fred_ssp1;
> + u64 pre_vmenter_fred_ssp2;
> + u64 pre_vmenter_fred_ssp3;
> +
> + /*
> + * Used to snapshot MSRs that are conditionally saved on VM-Exit in
> + * order to propagate the guest's pre-VM-Exit value into vmcs12.
> + *
> + * FRED MSRs are *always* saved to vmcs02 since KVM always sets
> + * SECONDARY_VM_EXIT_SAVE_IA32_FRED. However an L1 VMM, although
> + * unlikely, might choose not to set this bit, resulting in FRED MSRs
> + * not being saved to vmcs12.
> + *
> + * It's not a problem when SECONDARY_VM_EXIT_LOAD_IA32_FRED is set,
> + * as the CPU immediately loads the host FRED state from vmcs12 into
> + * the FRED MSRs.
> + *
> + * But an L1 VMM may clear SECONDARY_VM_EXIT_LOAD_IA32_FRED, causing
> + * the CPU to retain the pre VM-Exit FRED MSRs.
> + */
However I want to make this logic explicit. So we might end up with
adding the comment somewhere and removing all the pre_vmexit_fred_*
changes?
> + u64 pre_vmexit_fred_config;
> + u64 pre_vmexit_fred_rsp1;
> + u64 pre_vmexit_fred_rsp2;
> + u64 pre_vmexit_fred_rsp3;
> + u64 pre_vmexit_fred_stklvls;
> + u64 pre_vmexit_fred_ssp1;
> + u64 pre_vmexit_fred_ssp2;
> + u64 pre_vmexit_fred_ssp3;
>
> /* to migrate it to L1 if L2 writes to L1's CR8 directly */
> int l1_tpr_threshold;
Thanks!
Xin
Powered by blists - more mailing lists