[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <a3d1407c-86d6-46d4-ae96-b40d7b26eb34@oracle.com>
Date: Mon, 17 Nov 2025 19:36:31 -0800
From: Dongli Zhang <dongli.zhang@...cle.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: kvm@...r.kernel.org, x86@...nel.org, linux-kernel@...r.kernel.org,
chao.gao@...el.com, pbonzini@...hat.com, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
hpa@...or.com, joe.jin@...cle.com, alejandro.j.jimenez@...cle.com
Subject: Re: [PATCH v2 1/1] KVM: VMX: configure SVI during runtime APICv
activation
Hi Sean,
[snip]
>
> Hmm, what if we go the opposite direction and bundle the vISR update into
> KVM_REQ_APICV_UPDATE? Then we can drop nested.update_vmcs01_hwapic_isr, and
> hopefully avoid similar ordering issues in the future.
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 564f5af5ae86..7bf44a8111e5 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -5168,11 +5168,6 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
> kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
> }
>
> - if (vmx->nested.update_vmcs01_hwapic_isr) {
> - vmx->nested.update_vmcs01_hwapic_isr = false;
> - kvm_apic_update_hwapic_isr(vcpu);
> - }
> -
> if ((vm_exit_reason != -1) &&
> (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
> vmx->nested.need_vmcs12_to_shadow_sync = true;
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6f374c815ce2..64edf47bed02 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6907,7 +6907,7 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
> */
> WARN_ON_ONCE(vcpu->wants_to_run &&
> nested_cpu_has_vid(get_vmcs12(vcpu)));
> - to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
> + to_vmx(vcpu)->nested.update_vmcs01_apicv_status = true;
> return;
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index bc3ed3145d7e..17bd43d6faaf 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -135,7 +135,6 @@ struct nested_vmx {
> bool reload_vmcs01_apic_access_page;
> bool update_vmcs01_cpu_dirty_logging;
> bool update_vmcs01_apicv_status;
> - bool update_vmcs01_hwapic_isr;
>
> /*
> * Enlightened VMCS has been enabled. It does not mean that L1 has to
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9c2e28028c2b..445bf22ee519 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -11218,8 +11218,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
> kvm_hv_process_stimers(vcpu);
> #endif
> - if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
> + if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) {
> kvm_vcpu_update_apicv(vcpu);
> + kvm_apic_update_hwapic_isr(vcpu);
> + }
> if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
> kvm_check_async_pf_completion(vcpu);
Thank you very much for suggestion.
There are still a few issues to fix.
1. We still need to remove WARN_ON_ONCE() from vmx_hwapic_isr_update().
[ 1125.176217] WARNING: CPU: 8 PID: 8034 at arch/x86/kvm/vmx/vmx.c:6896
vmx_hwapic_isr_update+0x1c7/0x250 [kvm_intel]
... ...
[ 1125.339364] Call Trace:
[ 1125.342341] <TASK>
[ 1125.344793] vcpu_run+0x2edf/0x3aa0 [kvm]
[ 1125.349629] ? __pfx_load_fixmap_gdt+0x10/0x10
[ 1125.354771] ? __pfx_vcpu_run+0x10/0x10 [kvm]
[ 1125.359841] ? fpregs_mark_activate+0x99/0x150
[ 1125.364909] ? fpu_swap_kvm_fpstate+0x1a1/0x360
[ 1125.370129] kvm_arch_vcpu_ioctl_run+0x7b3/0x1560 [kvm]
[ 1125.376123] ? __pfx_eventfd_write+0x10/0x10
[ 1125.380989] kvm_vcpu_ioctl+0x525/0x1090 [kvm]
[ 1125.386133] ? __pfx_kvm_vcpu_ioctl+0x10/0x10 [kvm]
[ 1125.391801] ? vfs_write+0x21e/0xcc0
[ 1125.395928] ? __pfx_do_vfs_ioctl+0x10/0x10
[ 1125.400746] ? __pfx_vfs_write+0x10/0x10
[ 1125.405260] ? __pfx_ioctl_has_perm.constprop.0.isra.0+0x10/0x10
[ 1125.412141] ? fdget_pos+0x396/0x4c0
[ 1125.416225] ? fput+0x25/0x80
[ 1125.419628] __x64_sys_ioctl+0x133/0x1c0
[ 1125.424102] do_syscall_64+0x53/0xfa0
[ 1125.433954] entry_SYSCALL_64_after_hwframe+0x76/0x7e
2. As you mentioned in prior email, while this is not a functional issue,
apic_find_highest_isr() is still invoked unconditionally, as
kvm_apic_update_hwapic_isr() is always called during KVM_REQ_APICV_UPDATE.
3. The issue that Chao reminded is still present.
(1) Suppose APICv is activated during L2.
kvm_vcpu_update_apicv()
-> __kvm_vcpu_update_apicv()
-> apic->apicv_active = true
-> vmx_refresh_apicv_exec_ctrl()
-> vmx->nested.update_vmcs01_apicv_status = true
-> return
Then L2 exits to L1:
__nested_vmx_vmexit()
-> kvm_make_request(KVM_REQ_APICV_UPDATE)
vcpu_enter_guest: KVM_REQ_APICV_UPDATE
-> kvm_vcpu_update_apicv()
-> __kvm_vcpu_update_apicv()
-> return because of
if (apic->apicv_active == activate)
refresh_apicv_exec_ctrl() is skipped.
4. It looks more complicated if we update "update_vmcs01_apicv_status = true" at
both vmx_hwapic_isr_update() and vmx_refresh_apicv_exec_ctrl().
Therefore, how about we continue to handle 'update_vmcs01_apicv_status' and
'update_vmcs01_hwapic_isr' as independent operations.
1. Take the approach reviewed by Chao, and ...
2. Fix the vmx_refresh_apicv_exec_ctrl() issue with an additional patch:
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcea087b642f..7d98c11a8920 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
#include "trace.h"
#include "vmx.h"
#include "smm.h"
+#include "x86_ops.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5214,9 +5215,9 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32
vm_exit_reason,
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
}
- if (vmx->nested.update_vmcs01_apicv_status) {
- vmx->nested.update_vmcs01_apicv_status = false;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ if (vmx->nested.update_vmcs01_apicv_exec_ctrl) {
+ vmx->nested.update_vmcs01_apicv_exec_ctrl = false;
+ vmx_refresh_apicv_exec_ctrl(vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c3b9eb72b6f3..83705a6d5a8a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4415,7 +4415,7 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (is_guest_mode(vcpu)) {
- vmx->nested.update_vmcs01_apicv_status = true;
+ vmx->nested.update_vmcs01_apicv_exec_ctrl = true;
return;
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index ea93121029f9..f6bee0e132a8 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -134,7 +134,7 @@ struct nested_vmx {
bool change_vmcs01_virtual_apic_mode;
bool reload_vmcs01_apic_access_page;
bool update_vmcs01_cpu_dirty_logging;
- bool update_vmcs01_apicv_status;
+ bool update_vmcs01_apicv_exec_ctrl;
bool update_vmcs01_hwapic_isr;
/*
By the way, while reviewing source code, I noticed that certain read accesses to
'apicv_inhibit_reasons' are not protected by 'apicv_update_lock'.
Thank you very much!
Dongli Zhang
Powered by blists - more mailing lists