[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aRZKEC4n9hpLVCRp@google.com>
Date: Thu, 13 Nov 2025 13:13:52 -0800
From: Sean Christopherson <seanjc@...gle.com>
To: Dongli Zhang <dongli.zhang@...cle.com>
Cc: kvm@...r.kernel.org, x86@...nel.org, linux-kernel@...r.kernel.org,
chao.gao@...el.com, pbonzini@...hat.com, tglx@...utronix.de, mingo@...hat.com,
bp@...en8.de, dave.hansen@...ux.intel.com, hpa@...or.com, joe.jin@...cle.com,
alejandro.j.jimenez@...cle.com
Subject: Re: [PATCH v2 1/1] KVM: VMX: configure SVI during runtime APICv activation
On Wed, Nov 12, 2025, Dongli Zhang wrote:
> Hi Sean,
>
> On 11/12/25 6:47 AM, Sean Christopherson wrote:
> > On Sun, Nov 09, 2025, Dongli Zhang wrote:
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 91b6f2f3edc2..653b8b713547 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -4430,6 +4430,14 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
> > kvm_vcpu_apicv_active(vcpu));
> >
> > vmx_update_msr_bitmap_x2apic(vcpu);
> > +
> > + /*
> > + * Refresh SVI if APICv is enabled, as any changes KVM made to vISR
> > + * while APICv was disabled need to be reflected in SVI, e.g. so that
> > + * the next accelerated EOI will clear the correct vector in vISR.
> > + */
> > + if (kvm_vcpu_apicv_active(vcpu))
> > + kvm_apic_update_hwapic_isr(vcpu);
> > }
> >
> > static u32 vmx_exec_control(struct vcpu_vmx *vmx)
> > @@ -6880,7 +6888,7 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
> >
> > /*
> > * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI
> > - * is only relevant for if and only if Virtual Interrupt Delivery is
> > + * is only relevant for L2 if and only if Virtual Interrupt Delivery is
> > * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's
> > * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested
> > * VM-Exit, otherwise L1 with run with a stale SVI.
>
>
> As a quick reply, the idea is to call kvm_apic_update_hwapic_isr() in
> vmx_refresh_apicv_exec_ctrl(), instead of __kvm_vcpu_update_apicv().
>
> I think the below case doesn't work:
>
> 1. APICv is activated when vCPU is in L2.
>
> kvm_vcpu_update_apicv()
> -> __kvm_vcpu_update_apicv()
> -> vmx_refresh_apicv_exec_ctrl()
>
> vmx_refresh_apicv_exec_ctrl() returns after setting:
> vmx->nested.update_vmcs01_apicv_status = true.
>
>
> 2. On exit from L2 to L1, __nested_vmx_vmexit() requests for KVM_REQ_APICV_UPDATE.
>
> __nested_vmx_vmexit()
> -> leave_guest_mode(vcpu)
> -> kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu)
>
>
> 3. vCPU processes KVM_REQ_APICV_UPDATE again.
>
> This time, __kvm_vcpu_update_apicv() returns without calling
> refresh_apicv_exec_ctrl(), because (apic->apicv_active == activate).
>
> vmx_refresh_apicv_exec_ctrl() doesn't get any chance to be called.
Oof, that's nasty.
> In order to call kvm_apic_update_hwapic_isr() in vmx_refresh_apicv_exec_ctrl(),
> we may need to resolve the issue mentioned by Chao, for instance, with something
> like:
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index bcea087b642f..1725c6a94f99 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -19,6 +19,7 @@
> #include "trace.h"
> #include "vmx.h"
> #include "smm.h"
> +#include "x86_ops.h"
>
> static bool __read_mostly enable_shadow_vmcs = 1;
> module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
> @@ -5216,7 +5217,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32
> vm_exit_reason,
>
> if (vmx->nested.update_vmcs01_apicv_status) {
> vmx->nested.update_vmcs01_apicv_status = false;
> - kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
> + vmx_refresh_apicv_exec_ctrl(vcpu);
> }
Hmm, what if we go the opposite direction and bundle the vISR update into
KVM_REQ_APICV_UPDATE? Then we can drop nested.update_vmcs01_hwapic_isr, and
hopefully avoid similar ordering issues in the future.
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 564f5af5ae86..7bf44a8111e5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5168,11 +5168,6 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
- if (vmx->nested.update_vmcs01_hwapic_isr) {
- vmx->nested.update_vmcs01_hwapic_isr = false;
- kvm_apic_update_hwapic_isr(vcpu);
- }
-
if ((vm_exit_reason != -1) &&
(enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6f374c815ce2..64edf47bed02 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6907,7 +6907,7 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
*/
WARN_ON_ONCE(vcpu->wants_to_run &&
nested_cpu_has_vid(get_vmcs12(vcpu)));
- to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
+ to_vmx(vcpu)->nested.update_vmcs01_apicv_status = true;
return;
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bc3ed3145d7e..17bd43d6faaf 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -135,7 +135,6 @@ struct nested_vmx {
bool reload_vmcs01_apic_access_page;
bool update_vmcs01_cpu_dirty_logging;
bool update_vmcs01_apicv_status;
- bool update_vmcs01_hwapic_isr;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c2e28028c2b..445bf22ee519 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11218,8 +11218,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
kvm_hv_process_stimers(vcpu);
#endif
- if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
+ if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) {
kvm_vcpu_update_apicv(vcpu);
+ kvm_apic_update_hwapic_isr(vcpu);
+ }
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
Powered by blists - more mailing lists