[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <lbbd3hbglrlnsxwqb2t6cri7peqcjrrxqtfqdcnhqo5njlgava@v56im3yjgllf>
Date: Thu, 11 Dec 2025 20:13:33 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, Jim Mattson <jmattson@...gle.com>,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 11/13] KVM: nSVM: Simplify nested_svm_vmrun()
On Thu, Dec 11, 2025 at 07:25:21PM +0000, Yosry Ahmed wrote:
> On Tue, Dec 09, 2025 at 08:11:41AM -0800, Sean Christopherson wrote:
> > On Mon, Nov 10, 2025, Yosry Ahmed wrote:
> > > Call nested_svm_merge_msrpm() from enter_svm_guest_mode() if called from
> > > the VMRUN path, instead of making the call in nested_svm_vmrun(). This
> > > simplifies the flow of nested_svm_vmrun() and removes all jumps to
> > > cleanup labels.
> > >
> > > Signed-off-by: Yosry Ahmed <yosry.ahmed@...ux.dev>
> > > ---
> > > arch/x86/kvm/svm/nested.c | 28 +++++++++++++---------------
> > > 1 file changed, 13 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > > index a48668c36a191..89830380cebc5 100644
> > > --- a/arch/x86/kvm/svm/nested.c
> > > +++ b/arch/x86/kvm/svm/nested.c
> > > @@ -1020,6 +1020,9 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, bool from_vmrun)
> > >
> > > nested_svm_hv_update_vm_vp_ids(vcpu);
> > >
> > > + if (from_vmrun && !nested_svm_merge_msrpm(vcpu))
> >
> > This is silly, just do:
> >
> > if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true) ||
> > nested_svm_merge_msrpm(vcpu)) {
> > svm->nested.nested_run_pending = 0;
> > svm->nmi_l1_to_l2 = false;
> > svm->soft_int_injected = false;
> >
> > svm->vmcb->control.exit_code = SVM_EXIT_ERR;
> > svm->vmcb->control.exit_code_hi = -1u;
> > svm->vmcb->control.exit_info_1 = 0;
> > svm->vmcb->control.exit_info_2 = 0;
> >
> > nested_svm_vmexit(svm);
> > }
>
> Actually, if we go with the approach of making all VMRUN failures
> happen before preparing the VMCB02 (as discussed in the other thread),
> then we will want to call nested_svm_merge_msrpm() from within
> enter_svm_guest_mode().
We can also just call nested_svm_merge_msrpm() before
enter_svm_guest_mode(), which seems to work. Part of me still prefers to
keep all the potential failures bundled together in
enter_svm_guest_mode() though.
>
> Otherwise, we either have a separate failure path for
> nested_svm_merge_msrpm(), or we make all VMRUN failures happen after
> preparing the VMCB02 and handled by nested_svm_vmexit().
>
> I like having a separate exit path for VMRUN failures, and it makes more
> sense to do the consistency checks on VMCB12 before preparing VMCB02.
> But I understand if you prefer to keep things simple and move all
> failures after VMCB02.
>
> I already have it implemented with the separate VMRUN failure path, but
> I don't wanna spam you with another series if you prefer it the other
> way.
>
> >
> > > + return -1;
> >
> > Please stop returning -1, use a proper -errno.
> >
> > > +
> > > return 0;
> > > }
> > >
> > > @@ -1105,23 +1108,18 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> > >
> > > svm->nested.nested_run_pending = 1;
> > >
> > > - if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true))
> > > - goto out_exit_err;
> > > -
> > > - if (nested_svm_merge_msrpm(vcpu))
> > > - return ret;
> > > -
> > > -out_exit_err:
> > > - svm->nested.nested_run_pending = 0;
> > > - svm->nmi_l1_to_l2 = false;
> > > - svm->soft_int_injected = false;
> > > + if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true)) {
> > > + svm->nested.nested_run_pending = 0;
> > > + svm->nmi_l1_to_l2 = false;
> > > + svm->soft_int_injected = false;
> > >
> > > - svm->vmcb->control.exit_code = SVM_EXIT_ERR;
> > > - svm->vmcb->control.exit_code_hi = 0;
> > > - svm->vmcb->control.exit_info_1 = 0;
> > > - svm->vmcb->control.exit_info_2 = 0;
> > > + svm->vmcb->control.exit_code = SVM_EXIT_ERR;
> > > + svm->vmcb->control.exit_code_hi = 0;
> > > + svm->vmcb->control.exit_info_1 = 0;
> > > + svm->vmcb->control.exit_info_2 = 0;
> > >
> > > - nested_svm_vmexit(svm);
> > > + nested_svm_vmexit(svm);
> >
> > Note, there's a pre-existing bug in nested_svm_vmexit(). Lovely, and it's a
> > user-triggerable WARN_ON() (and not even a WARN_ON_ONCE() at that).
> >
> > If nested_svm_vmexit() fails to map vmcb12, it (unbelievably stupidly) injects a
> > #GP and hopes for the best. Oh FFS, it also has the asinine -EINVAL "logic".
> > Anyways, it injects #GP (maybe), and bails early, which leaves
> > KVM_REQ_GET_NESTED_STATE_PAGES set. KVM will then process that on the next
> > vcpu_enter_guest() and trip the WARN_ON() in svm_get_nested_state_pages().
> >
> > Something like this to clean up the mess:
> >
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index d4c872843a9d..96f8009a0d45 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -1018,9 +1018,6 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, bool from_vmrun)
> >
> > nested_svm_hv_update_vm_vp_ids(vcpu);
> >
> > - if (from_vmrun && !nested_svm_merge_msrpm(vcpu))
> > - return -1;
> > -
> > return 0;
> > }
> >
> > @@ -1094,7 +1091,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> >
> > svm->nested.nested_run_pending = 1;
> >
> > - if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true)) {
> > + if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true) ||
> > + nested_svm_merge_msrpm(vcpu)) {
> > svm->nested.nested_run_pending = 0;
> > svm->nmi_l1_to_l2 = false;
> > svm->soft_int_injected = false;
> > @@ -1158,24 +1156,16 @@ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
> > int nested_svm_vmexit(struct vcpu_svm *svm)
> > {
> > struct kvm_vcpu *vcpu = &svm->vcpu;
> > + gpa_t vmcb12_gpa = svm->nested.vmcb12_gpa;
> > struct vmcb *vmcb01 = svm->vmcb01.ptr;
> > struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
> > struct vmcb *vmcb12;
> > struct kvm_host_map map;
> > - int rc;
> > -
> > - rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
> > - if (rc) {
> > - if (rc == -EINVAL)
> > - kvm_inject_gp(vcpu, 0);
> > - return 1;
> > - }
> >
> > vmcb12 = map.hva;
> >
> > /* Exit Guest-Mode */
> > leave_guest_mode(vcpu);
> > - svm->nested.vmcb12_gpa = 0;
> > WARN_ON_ONCE(svm->nested.nested_run_pending);
> >
> > kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
> > @@ -1183,6 +1173,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> > /* in case we halted in L2 */
> > kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
> >
> > + svm->nested.vmcb12_gpa = 0;
> > +
> > + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) {
> > + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
> > + return 1;
> > + }
> > +
> > /* Give the current vmcb to the guest */
> >
> > vmcb12->save.es = vmcb02->save.es;
> > @@ -1973,7 +1970,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> >
> > static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
> > {
> > - if (WARN_ON(!is_guest_mode(vcpu)))
> > + if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
> > return true;
> >
> > if (!vcpu->arch.pdptrs_from_userspace &&
> >
Powered by blists - more mailing lists