[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3d69b44a-8542-4a11-b233-16487e980d54@intel.com>
Date: Thu, 21 Mar 2024 15:54:37 -0700
From: "Zhang, Dongsheng X" <dongsheng.x.zhang@...el.com>
To: Sagi Shahar <sagis@...gle.com>, linux-kselftest@...r.kernel.org,
 Ackerley Tng <ackerleytng@...gle.com>, Ryan Afranji <afranji@...gle.com>,
 Erdem Aktas <erdemaktas@...gle.com>,
 Isaku Yamahata <isaku.yamahata@...el.com>
Cc: Sean Christopherson <seanjc@...gle.com>,
 Paolo Bonzini <pbonzini@...hat.com>, Shuah Khan <shuah@...nel.org>,
 Peter Gonda <pgonda@...gle.com>, Haibo Xu <haibo1.xu@...el.com>,
 Chao Peng <chao.p.peng@...ux.intel.com>,
 Vishal Annapurve <vannapurve@...gle.com>, Roger Wang <runanwang@...gle.com>,
 Vipin Sharma <vipinsh@...gle.com>, jmattson@...gle.com, dmatlack@...gle.com,
 linux-kernel@...r.kernel.org, kvm@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [RFC PATCH v5 05/29] KVM: selftests: Add helper functions to
 create TDX VMs
On 12/12/2023 12:46 PM, Sagi Shahar wrote:
> From: Erdem Aktas <erdemaktas@...gle.com>
> 
> TDX requires additional IOCTLs to initialize VM and vCPUs to add
> private memory and to finalize the VM memory. Also additional utility
> functions are provided to manipulate a TD, similar to those that
> manipulate a VM in the current selftest framework.
> 
> A TD's initial register state cannot be manipulated directly by
> setting the VM's memory, hence boot code is provided at the TD's reset
> vector. This boot code takes boot parameters loaded in the TD's memory
> and sets up the TD for the selftest.
> 
> Signed-off-by: Erdem Aktas <erdemaktas@...gle.com>
> Signed-off-by: Ryan Afranji <afranji@...gle.com>
> Signed-off-by: Sagi Shahar <sagis@...gle.com>
> Co-developed-by: Ackerley Tng <ackerleytng@...gle.com>
> Signed-off-by: Ackerley Tng <ackerleytng@...gle.com>
> ---
>  tools/testing/selftests/kvm/Makefile          |   2 +
>  .../kvm/include/x86_64/tdx/td_boot.h          |  82 ++++
>  .../kvm/include/x86_64/tdx/td_boot_asm.h      |  16 +
>  .../kvm/include/x86_64/tdx/tdx_util.h         |  16 +
>  .../selftests/kvm/lib/x86_64/tdx/td_boot.S    | 101 ++++
>  .../selftests/kvm/lib/x86_64/tdx/tdx_util.c   | 434 ++++++++++++++++++
>  6 files changed, 651 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/include/x86_64/tdx/td_boot.h
>  create mode 100644 tools/testing/selftests/kvm/include/x86_64/tdx/td_boot_asm.h
>  create mode 100644 tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
>  create mode 100644 tools/testing/selftests/kvm/lib/x86_64/tdx/td_boot.S
>  create mode 100644 tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> 
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index b11ac221aba4..a35150ab855f 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -50,6 +50,8 @@ LIBKVM_x86_64 += lib/x86_64/svm.c
>  LIBKVM_x86_64 += lib/x86_64/ucall.c
>  LIBKVM_x86_64 += lib/x86_64/vmx.c
>  LIBKVM_x86_64 += lib/x86_64/sev.c
> +LIBKVM_x86_64 += lib/x86_64/tdx/tdx_util.c
> +LIBKVM_x86_64 += lib/x86_64/tdx/td_boot.S
>  
>  LIBKVM_aarch64 += lib/aarch64/gic.c
>  LIBKVM_aarch64 += lib/aarch64/gic_v3.c
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/td_boot.h b/tools/testing/selftests/kvm/include/x86_64/tdx/td_boot.h
> new file mode 100644
> index 000000000000..148057e569d6
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/td_boot.h
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef SELFTEST_TDX_TD_BOOT_H
> +#define SELFTEST_TDX_TD_BOOT_H
> +
> +#include <stdint.h>
> +#include "tdx/td_boot_asm.h"
> +
> +/*
> + * Layout for boot section (not to scale)
> + *
> + *                                  GPA
> + * ┌─────────────────────────────┬──0x1_0000_0000 (4GB)
> + * │   Boot code trampoline      │
> + * ├─────────────────────────────┼──0x0_ffff_fff0: Reset vector (16B below 4GB)
> + * │   Boot code                 │
> + * ├─────────────────────────────┼──td_boot will be copied here, so that the
> + * │                             │  jmp to td_boot is exactly at the reset vector
> + * │   Empty space               │
> + * │                             │
> + * ├─────────────────────────────┤
> + * │                             │
> + * │                             │
> + * │   Boot parameters           │
> + * │                             │
> + * │                             │
> + * └─────────────────────────────┴──0x0_ffff_0000: TD_BOOT_PARAMETERS_GPA
> + */
> +#define FOUR_GIGABYTES_GPA (4ULL << 30)
> +
> +/**
> + * The exact memory layout for LGDT or LIDT instructions.
> + */
> +struct __packed td_boot_parameters_dtr {
> +	uint16_t limit;
> +	uint32_t base;
> +};
> +
> +/**
> + * The exact layout in memory required for a ljmp, including the selector for
> + * changing code segment.
> + */
> +struct __packed td_boot_parameters_ljmp_target {
> +	uint32_t eip_gva;
> +	uint16_t code64_sel;
> +};
> +
> +/**
> + * Allows each vCPU to be initialized with different eip and esp.
> + */
> +struct __packed td_per_vcpu_parameters {
> +	uint32_t esp_gva;
> +	struct td_boot_parameters_ljmp_target ljmp_target;
> +};
> +
> +/**
> + * Boot parameters for the TD.
> + *
> + * Unlike a regular VM, we can't ask KVM to set registers such as esp, eip, etc
> + * before boot, so to run selftests, these registers' values have to be
> + * initialized by the TD.
> + *
> + * This struct is loaded in TD private memory at TD_BOOT_PARAMETERS_GPA.
> + *
> + * The TD boot code will read off parameters from this struct and set up the
> + * vcpu for executing selftests.
> + */
> +struct __packed td_boot_parameters {
> +	uint32_t cr0;
> +	uint32_t cr3;
> +	uint32_t cr4;
> +	struct td_boot_parameters_dtr gdtr;
> +	struct td_boot_parameters_dtr idtr;
> +	struct td_per_vcpu_parameters per_vcpu[];
> +};
> +
> +extern void td_boot(void);
> +extern void reset_vector(void);
> +extern void td_boot_code_end(void);
> +
> +#define TD_BOOT_CODE_SIZE (td_boot_code_end - td_boot)
> +
> +#endif /* SELFTEST_TDX_TD_BOOT_H */
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/td_boot_asm.h b/tools/testing/selftests/kvm/include/x86_64/tdx/td_boot_asm.h
> new file mode 100644
> index 000000000000..0a07104f7deb
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/td_boot_asm.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef SELFTEST_TDX_TD_BOOT_ASM_H
> +#define SELFTEST_TDX_TD_BOOT_ASM_H
> +
> +/*
> + * GPA where TD boot parameters wil lbe loaded.
Typo: "wil lbe" ==> "will be"
 
> + *
> + * TD_BOOT_PARAMETERS_GPA is arbitrarily chosen to
> + *
> + * + be within the 4GB address space
> + * + provide enough contiguous memory for the struct td_boot_parameters such
> + *   that there is one struct td_per_vcpu_parameters for KVM_MAX_VCPUS
> + */
> +#define TD_BOOT_PARAMETERS_GPA 0xffff0000
> +
> +#endif  // SELFTEST_TDX_TD_BOOT_ASM_H
> diff --git a/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> new file mode 100644
> index 000000000000..274b245f200b
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/include/x86_64/tdx/tdx_util.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef SELFTESTS_TDX_KVM_UTIL_H
> +#define SELFTESTS_TDX_KVM_UTIL_H
> +
> +#include <stdint.h>
> +
> +#include "kvm_util_base.h"
> +
> +struct kvm_vcpu *td_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code);
> +
> +struct kvm_vm *td_create(void);
> +void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
> +		   uint64_t attributes);
> +void td_finalize(struct kvm_vm *vm);
> +
> +#endif // SELFTESTS_TDX_KVM_UTIL_H
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/td_boot.S b/tools/testing/selftests/kvm/lib/x86_64/tdx/td_boot.S
> new file mode 100644
> index 000000000000..800e09264d4e
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/td_boot.S
> @@ -0,0 +1,101 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include "tdx/td_boot_asm.h"
> +
> +/* Offsets for reading struct td_boot_parameters */
> +#define TD_BOOT_PARAMETERS_CR0         0
> +#define TD_BOOT_PARAMETERS_CR3         4
> +#define TD_BOOT_PARAMETERS_CR4         8
> +#define TD_BOOT_PARAMETERS_GDT         12
> +#define TD_BOOT_PARAMETERS_IDT         18
> +#define TD_BOOT_PARAMETERS_PER_VCPU    24
> +
> +/* Offsets for reading struct td_per_vcpu_parameters */
> +#define TD_PER_VCPU_PARAMETERS_ESP_GVA     0
> +#define TD_PER_VCPU_PARAMETERS_LJMP_TARGET 4
> +
> +#define SIZEOF_TD_PER_VCPU_PARAMETERS      10
> +
> +.code32
> +
> +.globl td_boot
> +td_boot:
> +	/* In this procedure, edi is used as a temporary register */
> +	cli
> +
> +	/* Paging is off */
> +
> +	movl $TD_BOOT_PARAMETERS_GPA, %ebx
> +
> +	/*
> +	 * Find the address of struct td_per_vcpu_parameters for this
> +	 * vCPU based on esi (TDX spec: initialized with vcpu id). Put
> +	 * struct address into register for indirect addressing
> +	 */
> +	movl $SIZEOF_TD_PER_VCPU_PARAMETERS, %eax
> +	mul %esi
> +	leal TD_BOOT_PARAMETERS_PER_VCPU(%ebx), %edi
> +	addl %edi, %eax
> +
> +	/* Setup stack */
> +	movl TD_PER_VCPU_PARAMETERS_ESP_GVA(%eax), %esp
> +
> +	/* Setup GDT */
> +	leal TD_BOOT_PARAMETERS_GDT(%ebx), %edi
> +	lgdt (%edi)
> +
> +	/* Setup IDT */
> +	leal TD_BOOT_PARAMETERS_IDT(%ebx), %edi
> +	lidt (%edi)
> +
> +	/*
> +	 * Set up control registers (There are no instructions to
> +	 * mov from memory to control registers, hence we need to use ebx
> +	 * as a scratch register)
> +	 */
> +	movl TD_BOOT_PARAMETERS_CR4(%ebx), %edi
> +	movl %edi, %cr4
> +	movl TD_BOOT_PARAMETERS_CR3(%ebx), %edi
> +	movl %edi, %cr3
> +	movl TD_BOOT_PARAMETERS_CR0(%ebx), %edi
> +	movl %edi, %cr0
> +
> +	/* Paging is on after setting the most significant bit on cr0 */
> +
> +	/*
> +	 * Jump to selftest guest code. Far jumps read <segment
> +	 * selector:new eip> from <addr+4:addr>. This location has
> +	 * already been set up in boot parameters, and we can read boot
> +	 * parameters because boot code and boot parameters are loaded so
> +	 * that GVA and GPA are mapped 1:1.
> +	 */
> +	ljmp *TD_PER_VCPU_PARAMETERS_LJMP_TARGET(%eax)
> +
> +.globl reset_vector
> +reset_vector:
> +	jmp td_boot
> +	/*
> +	 * Pad reset_vector to its full size of 16 bytes so that this
> +	 * can be loaded with the end of reset_vector aligned to GPA=4G
> +	 */
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +	int3
> +
> +/* Leave marker so size of td_boot code can be computed */
> +.globl td_boot_code_end
> +td_boot_code_end:
> +
> +/* Disable executable stack */
> +.section .note.GNU-stack,"",%progbits
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> new file mode 100644
> index 000000000000..9b69c733ce01
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/x86_64/tdx/tdx_util.c
> @@ -0,0 +1,434 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#define _GNU_SOURCE
> +#include <asm/kvm.h>
> +#include <asm/kvm_host.h>
> +#include <errno.h>
> +#include <linux/kvm.h>
> +#include <stdint.h>
> +#include <sys/ioctl.h>
> +
> +#include "kvm_util.h"
> +#include "test_util.h"
> +#include "tdx/td_boot.h"
> +#include "kvm_util_base.h"
> +#include "processor.h"
> +
> +/*
> + * TDX ioctls
> + */
> +
> +static char *tdx_cmd_str[] = {
> +	"KVM_TDX_CAPABILITIES",
> +	"KVM_TDX_INIT_VM",
> +	"KVM_TDX_INIT_VCPU",
> +	"KVM_TDX_INIT_MEM_REGION",
> +	"KVM_TDX_FINALIZE_VM"
> +};
> +#define TDX_MAX_CMD_STR (ARRAY_SIZE(tdx_cmd_str))
> +
> +static void tdx_ioctl(int fd, int ioctl_no, uint32_t flags, void *data)
> +{
> +	struct kvm_tdx_cmd tdx_cmd;
> +	int r;
> +
> +	TEST_ASSERT(ioctl_no < TDX_MAX_CMD_STR, "Unknown TDX CMD : %d\n",
> +		    ioctl_no);
> +
> +	memset(&tdx_cmd, 0x0, sizeof(tdx_cmd));
> +	tdx_cmd.id = ioctl_no;
> +	tdx_cmd.flags = flags;
> +	tdx_cmd.data = (uint64_t)data;
> +
> +	r = ioctl(fd, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
> +	TEST_ASSERT(r == 0, "%s failed: %d  %d", tdx_cmd_str[ioctl_no], r,
> +		    errno);
> +}
> +
> +#define XFEATURE_MASK_CET (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)
> +
> +static void tdx_apply_cpuid_restrictions(struct kvm_cpuid2 *cpuid_data)
> +{
> +	for (int i = 0; i < cpuid_data->nent; i++) {
> +		struct kvm_cpuid_entry2 *e = &cpuid_data->entries[i];
> +
> +		if (e->function == 0xd && e->index == 0) {
> +			/*
> +			 * TDX module requires both XTILE_{CFG, DATA} to be set.
> +			 * Both bits are required for AMX to be functional.
> +			 */
> +			if ((e->eax & XFEATURE_MASK_XTILE) !=
> +			    XFEATURE_MASK_XTILE) {
> +				e->eax &= ~XFEATURE_MASK_XTILE;
> +			}
> +		}
> +		if (e->function == 0xd && e->index == 1) {
> +			/*
> +			 * TDX doesn't support LBR yet.
> +			 * Disable bits from the XCR0 register.
> +			 */
> +			e->ecx &= ~XFEATURE_MASK_LBR;
> +			/*
> +			 * TDX modules requires both CET_{U, S} to be set even
> +			 * if only one is supported.
> +			 */
> +			if (e->ecx & XFEATURE_MASK_CET)
> +				e->ecx |= XFEATURE_MASK_CET;
> +		}
> +	}
> +}
> +
> +static void tdx_td_init(struct kvm_vm *vm, uint64_t attributes)
> +{
> +	const struct kvm_cpuid2 *cpuid;
> +	struct kvm_tdx_init_vm *init_vm;
> +
> +	cpuid = kvm_get_supported_cpuid();
> +
> +	init_vm = malloc(sizeof(*init_vm) +
> +			 sizeof(init_vm->cpuid.entries[0]) * cpuid->nent);
Can add a sanity checking for init_vm here like the following:
TEST_ASSERT(init_vm, "vm allocation failed");
> +
> +	memset(init_vm, 0, sizeof(*init_vm));
Can use calloc instead:
init_vm = calloc(1, sizeof(*init_vm));
    
         
> +	memcpy(&init_vm->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
> +
> +	init_vm->attributes = attributes;
> +
> +	tdx_apply_cpuid_restrictions(&init_vm->cpuid);
> +
> +	tdx_ioctl(vm->fd, KVM_TDX_INIT_VM, 0, init_vm);
> +}
> +
> +static void tdx_td_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +	const struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid();
> +
> +	vcpu_init_cpuid(vcpu, cpuid);
> +	tdx_ioctl(vcpu->fd, KVM_TDX_INIT_VCPU, 0, NULL);
> +}
> +
> +static void tdx_init_mem_region(struct kvm_vm *vm, void *source_pages,
> +				uint64_t gpa, uint64_t size)
> +{
> +	struct kvm_tdx_init_mem_region mem_region = {
> +		.source_addr = (uint64_t)source_pages,
> +		.gpa = gpa,
> +		.nr_pages = size / PAGE_SIZE,
> +	};
> +	uint32_t metadata = KVM_TDX_MEASURE_MEMORY_REGION;
> +
> +	TEST_ASSERT((mem_region.nr_pages > 0) &&
> +			    ((mem_region.nr_pages * PAGE_SIZE) == size),
> +		    "Cannot add partial pages to the guest memory.\n");
> +	TEST_ASSERT(((uint64_t)source_pages & (PAGE_SIZE - 1)) == 0,
> +		    "Source memory buffer is not page aligned\n");
> +	tdx_ioctl(vm->fd, KVM_TDX_INIT_MEM_REGION, metadata, &mem_region);
> +}
> +
> +static void tdx_td_finalizemr(struct kvm_vm *vm)
> +{
> +	tdx_ioctl(vm->fd, KVM_TDX_FINALIZE_VM, 0, NULL);
> +}
> +
Nit: tdx_td_finalizemr ==> tdx_td_finalize_mr
> +/*
> + * TD creation/setup/finalization
> + */
> +
> +static void tdx_enable_capabilities(struct kvm_vm *vm)
> +{
> +	int rc;
> +
> +	rc = kvm_check_cap(KVM_CAP_X2APIC_API);
> +	TEST_ASSERT(rc, "TDX: KVM_CAP_X2APIC_API is not supported!");
> +	rc = kvm_check_cap(KVM_CAP_SPLIT_IRQCHIP);
> +	TEST_ASSERT(rc, "TDX: KVM_CAP_SPLIT_IRQCHIP is not supported!");
> +
> +	vm_enable_cap(vm, KVM_CAP_X2APIC_API,
> +		      KVM_X2APIC_API_USE_32BIT_IDS |
> +			      KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK);
> +	vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
> +}
> +
> +static void tdx_configure_memory_encryption(struct kvm_vm *vm)
> +{
> +	/* Configure shared/enCrypted bit for this VM according to TDX spec */
> +	vm->arch.s_bit = 1ULL << (vm->pa_bits - 1);
> +	vm->arch.c_bit = 0;
> +	/* Set gpa_protected_mask so that tagging/untagging of GPAs works */
> +	vm->gpa_protected_mask = vm->arch.s_bit;
> +	/* This VM is protected (has memory encryption) */
> +	vm->protected = true;
> +}
> +
> +static void tdx_apply_cr4_restrictions(struct kvm_sregs *sregs)
> +{
> +	/* TDX spec 11.6.2: CR4 bit MCE is fixed to 1 */
> +	sregs->cr4 |= X86_CR4_MCE;
> +
> +	/* Set this because UEFI also sets this up, to handle XMM exceptions */
> +	sregs->cr4 |= X86_CR4_OSXMMEXCPT;
> +
> +	/* TDX spec 11.6.2: CR4 bit VMXE and SMXE are fixed to 0 */
> +	sregs->cr4 &= ~(X86_CR4_VMXE | X86_CR4_SMXE);
> +}
> +
> +static void load_td_boot_code(struct kvm_vm *vm)
> +{
> +	void *boot_code_hva = addr_gpa2hva(vm, FOUR_GIGABYTES_GPA - TD_BOOT_CODE_SIZE);
> +
> +	TEST_ASSERT(td_boot_code_end - reset_vector == 16,
> +		"The reset vector must be 16 bytes in size.");
> +	memcpy(boot_code_hva, td_boot, TD_BOOT_CODE_SIZE);
> +}
> +
> +static void load_td_per_vcpu_parameters(struct td_boot_parameters *params,
> +					struct kvm_sregs *sregs,
> +					struct kvm_vcpu *vcpu,
> +					void *guest_code)
> +{
> +	/* Store vcpu_index to match what the TDX module would store internally */
> +	static uint32_t vcpu_index;
> +
> +	struct td_per_vcpu_parameters *vcpu_params = ¶ms->per_vcpu[vcpu_index];
I think we can use vcpu->id in place of vcpu_index in this function, thus removing vcpu_index
> +
> +	TEST_ASSERT(vcpu->initial_stack_addr != 0,
> +		"initial stack address should not be 0");
> +	TEST_ASSERT(vcpu->initial_stack_addr <= 0xffffffff,
> +		"initial stack address must fit in 32 bits");
> +	TEST_ASSERT((uint64_t)guest_code <= 0xffffffff,
> +		"guest_code must fit in 32 bits");
> +	TEST_ASSERT(sregs->cs.selector != 0, "cs.selector should not be 0");
> +
> +	vcpu_params->esp_gva = (uint32_t)(uint64_t)vcpu->initial_stack_addr;
> +	vcpu_params->ljmp_target.eip_gva = (uint32_t)(uint64_t)guest_code;
> +	vcpu_params->ljmp_target.code64_sel = sregs->cs.selector;
> +
> +	vcpu_index++;
> +}
> +
> +static void load_td_common_parameters(struct td_boot_parameters *params,
> +				struct kvm_sregs *sregs)
> +{
> +	/* Set parameters! */
> +	params->cr0 = sregs->cr0;
> +	params->cr3 = sregs->cr3;
> +	params->cr4 = sregs->cr4;
> +	params->gdtr.limit = sregs->gdt.limit;
> +	params->gdtr.base = sregs->gdt.base;
> +	params->idtr.limit = sregs->idt.limit;
> +	params->idtr.base = sregs->idt.base;
> +
> +	TEST_ASSERT(params->cr0 != 0, "cr0 should not be 0");
> +	TEST_ASSERT(params->cr3 != 0, "cr3 should not be 0");
> +	TEST_ASSERT(params->cr4 != 0, "cr4 should not be 0");
> +	TEST_ASSERT(params->gdtr.base != 0, "gdt base address should not be 0");
> +}
> +
> +static void load_td_boot_parameters(struct td_boot_parameters *params,
> +				struct kvm_vcpu *vcpu, void *guest_code)
> +{
> +	struct kvm_sregs sregs;
> +
> +	/* Assemble parameters in sregs */
> +	memset(&sregs, 0, sizeof(struct kvm_sregs));
> +	vcpu_setup_mode_sregs(vcpu->vm, &sregs);
> +	tdx_apply_cr4_restrictions(&sregs);
> +	kvm_setup_idt(vcpu->vm, &sregs.idt);
> +
> +	if (!params->cr0)
> +		load_td_common_parameters(params, &sregs);
> +
> +	load_td_per_vcpu_parameters(params, &sregs, vcpu, guest_code);
> +}
> +
> +/**
> + * Adds a vCPU to a TD (Trusted Domain) with minimum defaults. It will not set
> + * up any general purpose registers as they will be initialized by the TDX. In
> + * TDX, vCPUs RIP is set to 0xFFFFFFF0. See Intel TDX EAS Section "Initial State
> + * of Guest GPRs" for more information on vCPUs initial register values when
> + * entering the TD first time.
> + *
> + * Input Args:
> + *   vm - Virtual Machine
> + *   vcpuid - The id of the VCPU to add to the VM.
> + */
> +struct kvm_vcpu *td_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code)
> +{
> +	struct kvm_vcpu *vcpu;
> +
> +	/*
> +	 * TD setup will not use the value of rip set in vm_vcpu_add anyway, so
> +	 * NULL can be used for guest_code.
> +	 */
> +	vcpu = vm_vcpu_add(vm, vcpu_id, NULL);
> +
> +	tdx_td_vcpu_init(vcpu);
> +
> +	load_td_boot_parameters(addr_gpa2hva(vm, TD_BOOT_PARAMETERS_GPA),
> +				vcpu, guest_code);
> +
> +	return vcpu;
> +}
> +
> +/**
> + * Iterate over set ranges within sparsebit @s. In each iteration,
> + * @range_begin and @range_end will take the beginning and end of the set range,
> + * which are of type sparsebit_idx_t.
> + *
> + * For example, if the range [3, 7] (inclusive) is set, within the iteration,
> + * @range_begin will take the value 3 and @range_end will take the value 7.
> + *
> + * Ensure that there is at least one bit set before using this macro with
> + * sparsebit_any_set(), because sparsebit_first_set() will abort if none are
> + * set.
> + */
> +#define sparsebit_for_each_set_range(s, range_begin, range_end)		\
> +	for (range_begin = sparsebit_first_set(s),			\
> +		     range_end = sparsebit_next_clear(s, range_begin) - 1; \
> +	     range_begin && range_end;					\
> +	     range_begin = sparsebit_next_set(s, range_end),		\
> +		     range_end = sparsebit_next_clear(s, range_begin) - 1)
> +/*
> + * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the -1
> + * would then cause an underflow back to 2**64 - 1. This is expected and
> + * correct.
> + *
> + * If the last range in the sparsebit is [x, y] and we try to iterate,
> + * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try and
> + * find the first range, but that's correct because the condition expression
> + * would cause us to quit the loop.
> + */
> +
> +static void load_td_memory_region(struct kvm_vm *vm,
> +				  struct userspace_mem_region *region)
> +{
> +	const struct sparsebit *pages = region->protected_phy_pages;
> +	const uint64_t hva_base = region->region.userspace_addr;
> +	const vm_paddr_t gpa_base = region->region.guest_phys_addr;
> +	const sparsebit_idx_t lowest_page_in_region = gpa_base >>
> +						      vm->page_shift;
> +
> +	sparsebit_idx_t i;
> +	sparsebit_idx_t j;
> +
> +	if (!sparsebit_any_set(pages))
> +		return;
> +
> +	sparsebit_for_each_set_range(pages, i, j) {
> +		const uint64_t size_to_load = (j - i + 1) * vm->page_size;
> +		const uint64_t offset =
> +			(i - lowest_page_in_region) * vm->page_size;
> +		const uint64_t hva = hva_base + offset;
> +		const uint64_t gpa = gpa_base + offset;
> +		void *source_addr;
> +
> +		/*
> +		 * KVM_TDX_INIT_MEM_REGION ioctl cannot encrypt memory in place,
> +		 * hence we have to make a copy if there's only one backing
> +		 * memory source
> +		 */
> +		source_addr = mmap(NULL, size_to_load, PROT_READ | PROT_WRITE,
> +				   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +		TEST_ASSERT(
> +			source_addr,
> +			"Could not allocate memory for loading memory region");
> +
> +		memcpy(source_addr, (void *)hva, size_to_load);
> +
> +		tdx_init_mem_region(vm, source_addr, gpa, size_to_load);
> +
> +		munmap(source_addr, size_to_load);
> +	}
> +}
> +
> +static void load_td_private_memory(struct kvm_vm *vm)
> +{
> +	int ctr;
> +	struct userspace_mem_region *region;
> +
> +	hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
> +		load_td_memory_region(vm, region);
> +	}
> +}
> +
> +struct kvm_vm *td_create(void)
> +{
> +	struct vm_shape shape;
> +
> +	shape.mode = VM_MODE_DEFAULT;
> +	shape.type = KVM_X86_TDX_VM;
> +	return ____vm_create(shape);
Nit:
init shape to 0s:
struct vm_shape shape = {};
Pass pointer of share to ____vm_create() instead:
____vm_create(&shape)
> +}
> +
> +static void td_setup_boot_code(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type)
> +{
> +	vm_vaddr_t addr;
> +	size_t boot_code_allocation = round_up(TD_BOOT_CODE_SIZE, PAGE_SIZE);
> +	vm_paddr_t boot_code_base_gpa = FOUR_GIGABYTES_GPA - boot_code_allocation;
> +	size_t npages = DIV_ROUND_UP(boot_code_allocation, PAGE_SIZE);
> +
> +	vm_userspace_mem_region_add(vm, src_type, boot_code_base_gpa, 1, npages,
> +				    KVM_MEM_PRIVATE);
> +	addr = vm_vaddr_alloc_1to1(vm, boot_code_allocation, boot_code_base_gpa, 1);
> +	TEST_ASSERT_EQ(addr, boot_code_base_gpa);
> +
> +	load_td_boot_code(vm);
> +}
> +
> +static size_t td_boot_parameters_size(void)
> +{
> +	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
> +	size_t total_per_vcpu_parameters_size =
> +		max_vcpus * sizeof(struct td_per_vcpu_parameters);
> +
> +	return sizeof(struct td_boot_parameters) + total_per_vcpu_parameters_size;
> +}
> +
> +static void td_setup_boot_parameters(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type)
> +{
> +	vm_vaddr_t addr;
> +	size_t boot_params_size = td_boot_parameters_size();
> +	int npages = DIV_ROUND_UP(boot_params_size, PAGE_SIZE);
> +	size_t total_size = npages * PAGE_SIZE;
> +
> +	vm_userspace_mem_region_add(vm, src_type, TD_BOOT_PARAMETERS_GPA, 2,
> +				    npages, KVM_MEM_PRIVATE);
> +	addr = vm_vaddr_alloc_1to1(vm, total_size, TD_BOOT_PARAMETERS_GPA, 2);
> +	TEST_ASSERT_EQ(addr, TD_BOOT_PARAMETERS_GPA);
> +}
> +
> +void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
> +		   uint64_t attributes)
> +{
> +	uint64_t nr_pages_required;
> +
> +	tdx_enable_capabilities(vm);
> +
> +	tdx_configure_memory_encryption(vm);
> +
> +	tdx_td_init(vm, attributes);
> +
> +	nr_pages_required = vm_nr_pages_required(VM_MODE_DEFAULT, 1, 0);
> +
> +	/*
> +	 * Add memory (add 0th memslot) for TD. This will be used to setup the
> +	 * CPU (provide stack space for the CPU) and to load the elf file.
> +	 */
> +	vm_userspace_mem_region_add(vm, src_type, 0, 0, nr_pages_required,
> +				    KVM_MEM_PRIVATE);
> +
> +	kvm_vm_elf_load(vm, program_invocation_name);
> +
> +	vm_init_descriptor_tables(vm);
> +
> +	td_setup_boot_code(vm, src_type);
> +	td_setup_boot_parameters(vm, src_type);
> +}
> +
> +void td_finalize(struct kvm_vm *vm)
> +{
> +	sync_exception_handlers_to_guest(vm);
> +
> +	load_td_private_memory(vm);
> +
> +	tdx_td_finalizemr(vm);
> +}
Powered by blists - more mailing lists
 
