linux-kernel - Re: [PATCH 3/7] KVM: TDX: vcpu_run: save/restore host state(host kernel gs)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dfa5fd77-09e2-4133-a757-8c407593c6c9@intel.com>
Date: Tue, 26 Nov 2024 18:15:28 +0200
From: Adrian Hunter <adrian.hunter@...el.com>
To: Nikolay Borisov <nik.borisov@...e.com>, pbonzini@...hat.com,
 seanjc@...gle.com, kvm@...r.kernel.org, dave.hansen@...ux.intel.com
Cc: rick.p.edgecombe@...el.com, kai.huang@...el.com,
 reinette.chatre@...el.com, xiaoyao.li@...el.com,
 tony.lindgren@...ux.intel.com, binbin.wu@...ux.intel.com,
 dmatlack@...gle.com, isaku.yamahata@...el.com, linux-kernel@...r.kernel.org,
 x86@...nel.org, yan.y.zhao@...el.com, chao.gao@...el.com,
 weijiang.yang@...el.com
Subject: Re: [PATCH 3/7] KVM: TDX: vcpu_run: save/restore host state(host
 kernel gs)

On 25/11/24 16:12, Nikolay Borisov wrote:
> 
> 
> On 21.11.24 г. 22:14 ч., Adrian Hunter wrote:
>> From: Isaku Yamahata <isaku.yamahata@...el.com>
>>
>> On entering/exiting TDX vcpu, preserved or clobbered CPU state is different
>> from the VMX case. Add TDX hooks to save/restore host/guest CPU state.
>> Save/restore kernel GS base MSR.
>>
>> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
>> Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
>> Reviewed-by: Paolo Bonzini <pbonzini@...hat.com>
>> ---
>> TD vcpu enter/exit v1:
>>   - Clarify comment (Binbin)
>>   - Use lower case preserved and add the for VMX in log (Tony)
>>   - Fix bisectability issue with includes (Kai)
>> ---
>>   arch/x86/kvm/vmx/main.c    | 24 ++++++++++++++++++--
>>   arch/x86/kvm/vmx/tdx.c     | 46 ++++++++++++++++++++++++++++++++++++++
>>   arch/x86/kvm/vmx/tdx.h     |  4 ++++
>>   arch/x86/kvm/vmx/x86_ops.h |  4 ++++
>>   4 files changed, 76 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
>> index 44ec6005a448..3a8ffc199be2 100644
>> --- a/arch/x86/kvm/vmx/main.c
>> +++ b/arch/x86/kvm/vmx/main.c
>> @@ -129,6 +129,26 @@ static void vt_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>       vmx_vcpu_load(vcpu, cpu);
>>   }
>>   +static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>> +{
>> +    if (is_td_vcpu(vcpu)) {
>> +        tdx_prepare_switch_to_guest(vcpu);
>> +        return;
>> +    }
>> +
>> +    vmx_prepare_switch_to_guest(vcpu);
>> +}
>> +
>> +static void vt_vcpu_put(struct kvm_vcpu *vcpu)
>> +{
>> +    if (is_td_vcpu(vcpu)) {
>> +        tdx_vcpu_put(vcpu);
>> +        return;
>> +    }
>> +
>> +    vmx_vcpu_put(vcpu);
>> +}
>> +
>>   static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
>>   {
>>       if (is_td_vcpu(vcpu))
>> @@ -250,9 +270,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>>       .vcpu_free = vt_vcpu_free,
>>       .vcpu_reset = vt_vcpu_reset,
>>   -    .prepare_switch_to_guest = vmx_prepare_switch_to_guest,
>> +    .prepare_switch_to_guest = vt_prepare_switch_to_guest,
>>       .vcpu_load = vt_vcpu_load,
>> -    .vcpu_put = vmx_vcpu_put,
>> +    .vcpu_put = vt_vcpu_put,
>>         .update_exception_bitmap = vmx_update_exception_bitmap,
>>       .get_feature_msr = vmx_get_feature_msr,
>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>> index 5fa5b65b9588..6e4ea2d420bc 100644
>> --- a/arch/x86/kvm/vmx/tdx.c
>> +++ b/arch/x86/kvm/vmx/tdx.c
>> @@ -1,6 +1,7 @@
>>   // SPDX-License-Identifier: GPL-2.0
>>   #include <linux/cleanup.h>
>>   #include <linux/cpu.h>
>> +#include <linux/mmu_context.h>
>>   #include <asm/tdx.h>
>>   #include "capabilities.h"
>>   #include "mmu.h"
>> @@ -9,6 +10,7 @@
>>   #include "vmx.h"
>>   #include "mmu/spte.h"
>>   #include "common.h"
>> +#include "posted_intr.h"
>>     #include <trace/events/kvm.h>
>>   #include "trace.h"
>> @@ -605,6 +607,9 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
>>       if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
>>           vcpu->arch.xfd_no_write_intercept = true;
>>   +    tdx->host_state_need_save = true;
>> +    tdx->host_state_need_restore = false;
> 
> nit: Rather than have 2 separate values which actually work in tandem, why not define a u8 or even u32 and have a mask of the valid flags.
> 
> So you can have something like:
> 
> #define SAVE_HOST BIT(0)
> #define RESTORE_HOST BIT(1)
> 
> tdx->state_flags = SAVE_HOST
> 
> I don't know what are the plans for the future but there might be cases where you can have more complex flags composed of more simple ones.
> 

There are really only 3 possibilities:

	initial state (or after tdx_prepare_switch_to_host())
		tdx->host_state_need_save = true;
		tdx->host_state_need_restore = false;
	After save (i.e. after tdx_prepare_switch_to_guest())
		tdx->host_state_need_save = false
		tdx->host_state_need_restore = false;
	After enter/exit (i.e. after tdx_vcpu_enter_exit())
		tdx->host_state_need_save = false
		tdx->host_state_need_restore = true;

I can't think of good names, perhaps:

enum tdx_prepare_switch_state {
	TDX_PREP_UNSAVED,
	TDX_PREP_SAVED,
	TDX_PREP_UNRESTORED,
};

>>       tdx->state = VCPU_TD_STATE_UNINITIALIZED;
>>         return 0;
>> @@ -631,6 +636,45 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>       local_irq_enable();
>>   }
>>   +/*
>> + * Compared to vmx_prepare_switch_to_guest(), there is not much to do
>> + * as SEAMCALL/SEAMRET calls take care of most of save and restore.
>> + */
>> +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>> +{
>> +    struct vcpu_tdx *tdx = to_tdx(vcpu);
>> +
>> +    if (!tdx->host_state_need_save)
> if (!(tdx->state_flags & SAVE_HOST))

	if (tdx->prep_switch_state != TDX_PREP_UNSAVED)

>> +        return;
>> +
>> +    if (likely(is_64bit_mm(current->mm)))
>> +        tdx->msr_host_kernel_gs_base = current->thread.gsbase;
>> +    else
>> +        tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
>> +
>> +    tdx->host_state_need_save = false;
> 
> tdx->state &= ~SAVE_HOST

	tdx->prep_switch_state = TDX_PREP_SAVED;

>> +}
>> +
>> +static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu)
>> +{
>> +    struct vcpu_tdx *tdx = to_tdx(vcpu);
>> +
>> +    tdx->host_state_need_save = true;
>> +    if (!tdx->host_state_need_restore)
> if (!(tdx->state_flags & RESTORE_HOST)

	if (tdx->prep_switch_state != TDX_PREP_UNRESTORED)

> 
>> +        return;
>> +
>> +    ++vcpu->stat.host_state_reload;
>> +
>> +    wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base);
>> +    tdx->host_state_need_restore = false;

	tdx->prep_switch_state = TDX_PREP_UNSAVED;

>> +}
>> +
>> +void tdx_vcpu_put(struct kvm_vcpu *vcpu)
>> +{
>> +    vmx_vcpu_pi_put(vcpu);
>> +    tdx_prepare_switch_to_host(vcpu);
>> +}
>> +
>>   void tdx_vcpu_free(struct kvm_vcpu *vcpu)
>>   {
>>       struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
>> @@ -732,6 +776,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
>>         tdx_vcpu_enter_exit(vcpu);
>>   +    tdx->host_state_need_restore = true;
> 
> tdx->state_flags |= RESTORE_HOST

	tdx->prep_switch_state = TDX_PREP_UNRESTORED;

> 
>> +
>>       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
>>       trace_kvm_exit(vcpu, KVM_ISA_VMX);
>>   diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
>> index ebee1049b08b..48cf0a1abfcc 100644
>> --- a/arch/x86/kvm/vmx/tdx.h
>> +++ b/arch/x86/kvm/vmx/tdx.h
>> @@ -54,6 +54,10 @@ struct vcpu_tdx {
>>       u64 vp_enter_ret;
>>         enum vcpu_tdx_state state;
>> +
>> +    bool host_state_need_save;
>> +    bool host_state_need_restore;
> 
> this would save having a discrete member for those boolean checks.
> 
>> +    u64 msr_host_kernel_gs_base;
>>   };
>>     void tdh_vp_rd_failed(struct vcpu_tdx *tdx, char *uclass, u32 field, u64 err);
>> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
>> index 3d292a677b92..5bd45a720007 100644
>> --- a/arch/x86/kvm/vmx/x86_ops.h
>> +++ b/arch/x86/kvm/vmx/x86_ops.h
>> @@ -130,6 +130,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu);
>>   void tdx_vcpu_free(struct kvm_vcpu *vcpu);
>>   void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
>>   fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
>> +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
>> +void tdx_vcpu_put(struct kvm_vcpu *vcpu);
>>     int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
>>   @@ -161,6 +163,8 @@ static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediat
>>   {
>>       return EXIT_FASTPATH_NONE;
>>   }
>> +static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {}
>> +static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {}
>>     static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
>>   
>