[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e8163ac4-59cd-4beb-bb92-44aa2d7702ab@linux.intel.com>
Date: Tue, 10 Dec 2024 10:51:56 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Chao Gao <chao.gao@...el.com>
Cc: pbonzini@...hat.com, seanjc@...gle.com, kvm@...r.kernel.org,
rick.p.edgecombe@...el.com, kai.huang@...el.com, adrian.hunter@...el.com,
reinette.chatre@...el.com, xiaoyao.li@...el.com,
tony.lindgren@...ux.intel.com, isaku.yamahata@...el.com,
yan.y.zhao@...el.com, michael.roth@....com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 4/7] KVM: TDX: Handle TDG.VP.VMCALL<MapGPA>
On 12/9/2024 8:45 PM, Chao Gao wrote:
>> +/*
>> + * Split into chunks and check interrupt pending between chunks. This allows
>> + * for timely injection of interrupts to prevent issues with guest lockup
>> + * detection.
> Would it cause any problems if an (intra-host or inter-host) migration happens
> between chunks?
>
> My understanding is that KVM would lose track of the progress if
> map_gpa_next/end are not migrated. I'm not sure if KVM should expose the
> state or prevent migration in the middle. Or, we can let the userspace VMM
> cut the range into chunks, making it the userspace VMM's responsibility to
> ensure necessary state is migrated.
>
> I am not asking to fix this issue right now. I just want to ensure this issue
> can be solved in a clean way when we start to support migration.
How about:
Before exiting to userspace, KVM always sets the start GPA to r11 and set
return code to TDVMCALL_STATUS_RETRY.
- If userspace finishes the part, the complete_userspace_io() callback will
be called and the return code will be set to TDVMCALL_STATUS_SUCCESS.
- If the live migration interrupts the MapGAP in the userspace, and
complete_userspace_io() is not called, when the vCPU resumes from migration,
TDX guest will see the return code is TDVMCALL_STATUS_RETRY with the failed
GPA, and it can retry the MapGAP with the failed GAP.
>
>> + */
>> +#define TDX_MAP_GPA_MAX_LEN (2 * 1024 * 1024)
>> +static void __tdx_map_gpa(struct vcpu_tdx * tdx);
>> +
>> +static int tdx_complete_vmcall_map_gpa(struct kvm_vcpu *vcpu)
>> +{
>> + struct vcpu_tdx * tdx = to_tdx(vcpu);
>> +
>> + if(vcpu->run->hypercall.ret) {
>> + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
>> + kvm_r11_write(vcpu, tdx->map_gpa_next);
> s/kvm_r11_write/tdvmcall_set_return_val
>
> please fix other call sites in this patch.
I didn't use tdvmcall_set_return_val() because it's used for TDVMCALL
like MMIO, PIO, etc..., which has a return value.
For MapGPA, it's part of error information returned to TDX guest.
So, I used the raw register name version.
Let's see if others also think tdvmcall_set_return_val() is more clear
for this case.
>
>> + return 1;
>> + }
>> +
>> + tdx->map_gpa_next += TDX_MAP_GPA_MAX_LEN;
>> + if (tdx->map_gpa_next >= tdx->map_gpa_end) {
>> + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
>> + return 1;
>> + }
>> +
>> + /*
>> + * Stop processing the remaining part if there is pending interrupt.
>> + * Skip checking pending virtual interrupt (reflected by
>> + * TDX_VCPU_STATE_DETAILS_INTR_PENDING bit) to save a seamcall because
>> + * if guest disabled interrupt, it's OK not returning back to guest
>> + * due to non-NMI interrupt. Also it's rare to TDVMCALL_MAP_GPA
>> + * immediately after STI or MOV/POP SS.
>> + */
>> + if (pi_has_pending_interrupt(vcpu) ||
>> + kvm_test_request(KVM_REQ_NMI, vcpu) || vcpu->arch.nmi_pending) {
>> + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_RETRY);
>> + kvm_r11_write(vcpu, tdx->map_gpa_next);
>> + return 1;
>> + }
>> +
>> + __tdx_map_gpa(tdx);
>> + /* Forward request to userspace. */
>> + return 0;
>> +}
>> +
>> +static void __tdx_map_gpa(struct vcpu_tdx * tdx)
>> +{
>> + u64 gpa = tdx->map_gpa_next;
>> + u64 size = tdx->map_gpa_end - tdx->map_gpa_next;
>> +
>> + if(size > TDX_MAP_GPA_MAX_LEN)
>> + size = TDX_MAP_GPA_MAX_LEN;
>> +
>> + tdx->vcpu.run->exit_reason = KVM_EXIT_HYPERCALL;
>> + tdx->vcpu.run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
>> + tdx->vcpu.run->hypercall.args[0] = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm));
>> + tdx->vcpu.run->hypercall.args[1] = size / PAGE_SIZE;
>> + tdx->vcpu.run->hypercall.args[2] = vt_is_tdx_private_gpa(tdx->vcpu.kvm, gpa) ?
>> + KVM_MAP_GPA_RANGE_ENCRYPTED :
>> + KVM_MAP_GPA_RANGE_DECRYPTED;
>> + tdx->vcpu.run->hypercall.flags = KVM_EXIT_HYPERCALL_LONG_MODE;
>> +
>> + tdx->vcpu.arch.complete_userspace_io = tdx_complete_vmcall_map_gpa;
>> +}
>> +
>> +static int tdx_map_gpa(struct kvm_vcpu *vcpu)
>> +{
>> + struct vcpu_tdx * tdx = to_tdx(vcpu);
>> + u64 gpa = tdvmcall_a0_read(vcpu);
>> + u64 size = tdvmcall_a1_read(vcpu);
>> + u64 ret;
>> +
>> + /*
>> + * Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires
>> + * userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE
>> + * bit set. If not, the error code is not defined in GHCI for TDX, use
>> + * TDVMCALL_STATUS_INVALID_OPERAND for this case.
>> + */
>> + if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
>> + ret = TDVMCALL_STATUS_INVALID_OPERAND;
>> + goto error;
>> + }
>> +
>> + if (gpa + size <= gpa || !kvm_vcpu_is_legal_gpa(vcpu, gpa) ||
>> + !kvm_vcpu_is_legal_gpa(vcpu, gpa + size -1) ||
>> + (vt_is_tdx_private_gpa(vcpu->kvm, gpa) !=
>> + vt_is_tdx_private_gpa(vcpu->kvm, gpa + size -1))) {
>> + ret = TDVMCALL_STATUS_INVALID_OPERAND;
>> + goto error;
>> + }
>> +
>> + if (!PAGE_ALIGNED(gpa) || !PAGE_ALIGNED(size)) {
>> + ret = TDVMCALL_STATUS_ALIGN_ERROR;
>> + goto error;
>> + }
>> +
>> + tdx->map_gpa_end = gpa + size;
>> + tdx->map_gpa_next = gpa;
>> +
>> + __tdx_map_gpa(tdx);
>> + /* Forward request to userspace. */
>> + return 0;
>> +
>> +error:
>> + tdvmcall_set_return_code(vcpu, ret);
>> + kvm_r11_write(vcpu, gpa);
>> + return 1;
>> +}
>> +
>> static int handle_tdvmcall(struct kvm_vcpu *vcpu)
>> {
>> if (tdvmcall_exit_type(vcpu))
>> return tdx_emulate_vmcall(vcpu);
>>
>> switch (tdvmcall_leaf(vcpu)) {
>> + case TDVMCALL_MAP_GPA:
>> + return tdx_map_gpa(vcpu);
>> default:
>> break;
>> }
>> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
>> index 1abc94b046a0..bfae70887695 100644
>> --- a/arch/x86/kvm/vmx/tdx.h
>> +++ b/arch/x86/kvm/vmx/tdx.h
>> @@ -71,6 +71,9 @@ struct vcpu_tdx {
>>
>> enum tdx_prepare_switch_state prep_switch_state;
>> u64 msr_host_kernel_gs_base;
>> +
>> + u64 map_gpa_next;
>> + u64 map_gpa_end;
>> };
>>
>> void tdh_vp_rd_failed(struct vcpu_tdx *tdx, char *uclass, u32 field, u64 err);
>> --
>> 2.46.0
>>
Powered by blists - more mailing lists