[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <oims37p6hfw4d2ufyinvi44scy3rhmbvibsmi66cde4e4pnidb@ugwhcwtalghf>
Date: Tue, 16 Dec 2025 16:34:56 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 24/26] KVM: nSVM: Restrict mapping VMCB12 on nested
VMRUN
On Mon, Dec 15, 2025 at 07:27:19PM +0000, Yosry Ahmed wrote:
> All accesses to the VMCB12 in the guest memory on nested VMRUN are
> limited to nested_svm_vmrun() and nested_svm_failed_vmrun(). However,
> the VMCB12 remains mapped throughout nested_svm_vmrun(). Mapping and
> unmapping around usages is possible, but it becomes easy-ish to
> introduce bugs where 'vmcb12' is used after being unmapped.
>
> Move reading the VMCB12 and copying to cache from nested_svm_vmrun()
> into a new helper, nested_svm_copy_vmcb12_to_cache(), that maps the
> VMCB12, caches the needed fields, and unmaps it. Use
> kvm_vcpu_map_readonly() as only reading the VMCB12 is needed.
>
> Similarly, move mapping the VMCB12 on VMRUN failure into
> nested_svm_failed_vmrun(). Inject a triple fault if the mapping fails,
> similar to nested_svm_vmexit().
>
> Signed-off-by: Yosry Ahmed <yosry.ahmed@...ux.dev>
> ---
> arch/x86/kvm/svm/nested.c | 55 ++++++++++++++++++++++++++++-----------
> 1 file changed, 40 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 48ba34d8b713..d33a2a27efe5 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -1055,23 +1055,55 @@ static void __nested_svm_vmexit(struct vcpu_svm *svm, struct vmcb *vmcb12)
> kvm_queue_exception(vcpu, DB_VECTOR);
> }
>
> -static void nested_svm_failed_vmrun(struct vcpu_svm *svm, struct vmcb *vmcb12)
> +static void nested_svm_failed_vmrun(struct vcpu_svm *svm, u64 vmcb12_gpa)
> {
> + struct kvm_vcpu *vcpu = &svm->vcpu;
> + struct kvm_host_map map;
> + struct vmcb *vmcb12;
> + int r;
> +
> WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
>
Ugh I missed leave_guest_mode() here, which means guest state won't be
cleaned up properly and the triple fault won't be inject correctly if
unmap fails. I re-introduced the bug I fixed earlier in the series :')
Should probably add WARN_ON_ONCE(is_guest_mode()) in
__nested_svm_vmexit() since the caller is expected to
leave_guest_mode().
> + r = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
> + if (r) {
> + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
> + return;
> + }
> +
> + vmcb12 = map.hva;
> vmcb12->control.exit_code = SVM_EXIT_ERR;
> vmcb12->control.exit_code_hi = -1u;
> vmcb12->control.exit_info_1 = 0;
> vmcb12->control.exit_info_2 = 0;
> __nested_svm_vmexit(svm, vmcb12);
> +
> + kvm_vcpu_unmap(vcpu, &map);
> +}
> +
> +static int nested_svm_copy_vmcb12_to_cache(struct kvm_vcpu *vcpu, u64 vmcb12_gpa)
> +{
> + struct vcpu_svm *svm = to_svm(vcpu);
> + struct kvm_host_map map;
> + struct vmcb *vmcb12;
> + int r;
> +
> + r = kvm_vcpu_map_readonly(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
> + if (r)
> + return r;
> +
> + vmcb12 = map.hva;
> +
> + nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
> + nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
> +
> + kvm_vcpu_unmap(vcpu, &map);
> + return 0;
> }
>
> int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> int ret;
> - struct vmcb *vmcb12;
> - struct kvm_host_map map;
> u64 vmcb12_gpa;
> struct vmcb *vmcb01 = svm->vmcb01.ptr;
>
> @@ -1092,22 +1124,17 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> return ret;
> }
>
> + if (WARN_ON_ONCE(!svm->nested.initialized))
> + return -EINVAL;
> +
> vmcb12_gpa = svm->vmcb->save.rax;
> - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) {
> + if (nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa)) {
> kvm_inject_gp(vcpu, 0);
> return 1;
> }
>
> ret = kvm_skip_emulated_instruction(vcpu);
>
> - vmcb12 = map.hva;
> -
> - if (WARN_ON_ONCE(!svm->nested.initialized))
> - return -EINVAL;
> -
> - nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
> - nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
> -
> /*
> * Since vmcb01 is not in use, we can use it to store some of the L1
> * state.
> @@ -1128,11 +1155,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> svm->nmi_l1_to_l2 = false;
> svm->soft_int_injected = false;
>
> - nested_svm_failed_vmrun(svm, vmcb12);
> + nested_svm_failed_vmrun(svm, vmcb12_gpa);
> }
>
> - kvm_vcpu_unmap(vcpu, &map);
> -
> return ret;
> }
>
> --
> 2.52.0.239.gd5f0c6e74e-goog
>
Powered by blists - more mailing lists