linux-kernel - [PATCH v2 10/10] KVM: selftests: Add L2 vcpu context switch test

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251118171113.363528-11-griffoul@gmail.org>
Date: Tue, 18 Nov 2025 17:11:13 +0000
From: griffoul@...il.com
To: kvm@...r.kernel.org
Cc: seanjc@...gle.com,
	pbonzini@...hat.com,
	vkuznets@...hat.com,
	shuah@...nel.org,
	dwmw@...zon.co.uk,
	linux-kselftest@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Fred Griffoul <fgriffo@...zon.co.uk>
Subject: [PATCH v2 10/10] KVM: selftests: Add L2 vcpu context switch test

From: Fred Griffoul <fgriffo@...zon.co.uk>

Add selftest to validate nested VMX context switching between multiple
L2 vCPUs running on the same L1 vCPU. The test exercises both direct
VMX interface (using vmptrld/vmclear operations) and enlightened VMCS
(eVMCS) interface for Hyper-V nested scenarios.

The test creates multiple VMCS structures and switches between them to
verify that the nested_context kvm counters are correct, according to
the number of L2 vCPUs and the number of switches.

Signed-off-by: Fred Griffoul <fgriffo@...zon.co.uk>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../selftests/kvm/x86/vmx_l2_switch_test.c    | 416 ++++++++++++++++++
 2 files changed, 417 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 3431568d837e..5d47afa5789b 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -138,6 +138,7 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
 TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
 TEST_GEN_PROGS_x86 += x86/aperfmperf_test
 TEST_GEN_PROGS_x86 += x86/vmx_apic_update_test
+TEST_GEN_PROGS_x86 += x86/vmx_l2_switch_test
 TEST_GEN_PROGS_x86 += access_tracking_perf_test
 TEST_GEN_PROGS_x86 += coalesced_io_test
 TEST_GEN_PROGS_x86 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c b/tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c
new file mode 100644
index 000000000000..5ec0da2f8386
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test nested VMX context switching between multiple VMCS
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define L2_GUEST_STACK_SIZE 64
+#define L2_VCPU_MAX 16
+
+struct l2_vcpu_config {
+	vm_vaddr_t hv_pages_gva;	/* Guest VA for eVMCS */
+	vm_vaddr_t vmx_pages_gva;	/* Guest VA for VMX pages */
+	unsigned long stack[L2_GUEST_STACK_SIZE];
+	uint16_t vpid;
+};
+
+struct l1_test_config {
+	struct l2_vcpu_config l2_vcpus[L2_VCPU_MAX];
+	uint64_t hypercall_gpa;
+	uint32_t nr_l2_vcpus;
+	uint32_t nr_switches;
+	bool enable_vpid;
+	bool use_evmcs;
+	bool sched_only;
+};
+
+static void l2_guest(void)
+{
+	while (1)
+		vmcall();
+}
+
+static void run_l2_guest_evmcs(struct hyperv_test_pages *hv_pages,
+			       struct vmx_pages *vmx,
+			       void *guest_rip,
+			       void *guest_rsp,
+			       uint16_t vpid)
+{
+	GUEST_ASSERT(load_evmcs(hv_pages));
+	prepare_vmcs(vmx, guest_rip, guest_rsp);
+	current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+	vmwrite(VIRTUAL_PROCESSOR_ID, vpid);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+	current_evmcs->guest_rip += 3;	/* vmcall */
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+}
+
+static void run_l2_guest_vmx_migrate(struct vmx_pages *vmx,
+				     void *guest_rip,
+				     void *guest_rsp,
+				     uint16_t vpid,
+				     bool start)
+{
+	uint32_t control;
+
+	/*
+	 * Emulate L2 vCPU migration: vmptrld/vmlaunch/vmclear
+	 */
+
+	if (start)
+		GUEST_ASSERT(load_vmcs(vmx));
+	else
+		GUEST_ASSERT(!vmptrld(vmx->vmcs_gpa));
+
+	prepare_vmcs(vmx, guest_rip, guest_rsp);
+
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_USE_MSR_BITMAPS;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+	vmwrite(VIRTUAL_PROCESSOR_ID, vpid);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(vmptrstz() == vmx->vmcs_gpa);
+	GUEST_ASSERT(!vmclear(vmx->vmcs_gpa));
+}
+
+static void run_l2_guest_vmx_sched(struct vmx_pages *vmx,
+				   void *guest_rip,
+				   void *guest_rsp,
+				   uint16_t vpid,
+				   bool start)
+{
+	/*
+	 * Emulate L2 vCPU multiplexing: vmptrld/vmresume
+	 */
+
+	if (start) {
+		uint32_t control;
+
+		GUEST_ASSERT(load_vmcs(vmx));
+		prepare_vmcs(vmx, guest_rip, guest_rsp);
+
+		control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+		control |= CPU_BASED_USE_MSR_BITMAPS;
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+		vmwrite(VIRTUAL_PROCESSOR_ID, vpid);
+
+		GUEST_ASSERT(!vmlaunch());
+	} else {
+		GUEST_ASSERT(!vmptrld(vmx->vmcs_gpa));
+		GUEST_ASSERT(!vmresume());
+	}
+
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+
+	vmwrite(GUEST_RIP,
+		vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+}
+
+static void l1_guest_evmcs(struct l1_test_config *config)
+{
+	struct hyperv_test_pages *hv_pages;
+	struct vmx_pages *vmx_pages;
+	uint32_t i, j;
+
+	/* Initialize Hyper-V MSRs */
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, config->hypercall_gpa);
+
+	/* Enable VP assist page */
+	hv_pages = (struct hyperv_test_pages *)config->l2_vcpus[0].hv_pages_gva;
+	enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+	/* Enable evmcs */
+	evmcs_enable();
+
+	vmx_pages = (struct vmx_pages *)config->l2_vcpus[0].vmx_pages_gva;
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+	for (i = 0; i < config->nr_switches; i++) {
+		for (j = 0; j < config->nr_l2_vcpus; j++) {
+			struct l2_vcpu_config *l2 = &config->l2_vcpus[j];
+
+			hv_pages = (struct hyperv_test_pages *)l2->hv_pages_gva;
+			vmx_pages = (struct vmx_pages *)l2->vmx_pages_gva;
+
+			run_l2_guest_evmcs(hv_pages, vmx_pages, l2_guest,
+					   &l2->stack[L2_GUEST_STACK_SIZE],
+					   l2->vpid);
+		}
+	}
+
+	GUEST_DONE();
+}
+
+static void l1_guest_vmx(struct l1_test_config *config)
+{
+	struct vmx_pages *vmx_pages;
+	uint32_t i, j;
+
+	vmx_pages = (struct vmx_pages *)config->l2_vcpus[0].vmx_pages_gva;
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+	for (i = 0; i < config->nr_switches; i++) {
+		for (j = 0; j < config->nr_l2_vcpus; j++) {
+			struct l2_vcpu_config *l2 = &config->l2_vcpus[j];
+
+			vmx_pages = (struct vmx_pages *)l2->vmx_pages_gva;
+
+			if (config->sched_only)
+				run_l2_guest_vmx_sched(vmx_pages, l2_guest,
+						       &l2->stack[L2_GUEST_STACK_SIZE],
+						       l2->vpid, i == 0);
+			else
+				run_l2_guest_vmx_migrate(vmx_pages, l2_guest,
+							 &l2->stack[L2_GUEST_STACK_SIZE],
+							 l2->vpid, i == 0);
+		}
+	}
+
+	if (config->sched_only) {
+		for (j = 0; j < config->nr_l2_vcpus; j++) {
+			struct l2_vcpu_config *l2 = &config->l2_vcpus[j];
+
+			vmx_pages = (struct vmx_pages *)l2->vmx_pages_gva;
+			vmclear(vmx_pages->vmcs_gpa);
+		}
+	}
+
+	GUEST_DONE();
+}
+
+static void vcpu_clone_hyperv_test_pages(struct kvm_vm *vm,
+					 vm_vaddr_t src_gva,
+					 vm_vaddr_t *dst_gva)
+{
+	struct hyperv_test_pages *src, *dst;
+	vm_vaddr_t evmcs_gva;
+
+	*dst_gva = vm_vaddr_alloc_page(vm);
+
+	src = addr_gva2hva(vm, src_gva);
+	dst = addr_gva2hva(vm, *dst_gva);
+	memcpy(dst, src, sizeof(*dst));
+
+	/* Allocate a new evmcs page */
+	evmcs_gva = vm_vaddr_alloc_page(vm);
+	dst->enlightened_vmcs = (void *)evmcs_gva;
+	dst->enlightened_vmcs_hva = addr_gva2hva(vm, evmcs_gva);
+	dst->enlightened_vmcs_gpa = addr_gva2gpa(vm, evmcs_gva);
+}
+
+static void prepare_vcpu(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+			 uint32_t nr_l2_vcpus, uint32_t nr_switches,
+			 bool enable_vpid, bool use_evmcs,
+			 bool sched_only)
+{
+	vm_vaddr_t config_gva;
+	struct l1_test_config *config;
+	vm_vaddr_t hypercall_page_gva = 0;
+	uint32_t i;
+
+	TEST_ASSERT(nr_l2_vcpus <= L2_VCPU_MAX,
+		    "Too many L2 vCPUs: %u (max %u)", nr_l2_vcpus, L2_VCPU_MAX);
+
+	/* Allocate config structure in guest memory */
+	config_gva = vm_vaddr_alloc(vm, sizeof(*config), 0x1000);
+	config = addr_gva2hva(vm, config_gva);
+	memset(config, 0, sizeof(*config));
+
+	if (use_evmcs) {
+		/* Allocate hypercall page */
+		hypercall_page_gva = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, hypercall_page_gva), 0, getpagesize());
+		config->hypercall_gpa = addr_gva2gpa(vm, hypercall_page_gva);
+
+		/* Enable Hyper-V enlightenments */
+		vcpu_set_hv_cpuid(vcpu);
+		vcpu_enable_evmcs(vcpu);
+	}
+
+	/* Allocate resources for each L2 vCPU */
+	for (i = 0; i < nr_l2_vcpus; i++) {
+		vm_vaddr_t vmx_pages_gva;
+
+		/* Allocate VMX pages (needed for both VMX and eVMCS) */
+		vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		config->l2_vcpus[i].vmx_pages_gva = vmx_pages_gva;
+
+		if (use_evmcs) {
+			vm_vaddr_t hv_pages_gva;
+
+			/* Allocate or clone hyperv_test_pages */
+			if (i == 0) {
+				vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+			} else {
+				vm_vaddr_t first_hv_gva =
+				    config->l2_vcpus[0].hv_pages_gva;
+				vcpu_clone_hyperv_test_pages(vm, first_hv_gva,
+							     &hv_pages_gva);
+			}
+			config->l2_vcpus[i].hv_pages_gva = hv_pages_gva;
+		}
+
+		/* Set VPID */
+		config->l2_vcpus[i].vpid = enable_vpid ? (i + 3) : 0;
+	}
+
+	config->nr_l2_vcpus = nr_l2_vcpus;
+	config->nr_switches = nr_switches;
+	config->enable_vpid = enable_vpid;
+	config->use_evmcs = use_evmcs;
+	config->sched_only = use_evmcs ? false : sched_only;
+
+	/* Pass single pointer to config structure */
+	vcpu_args_set(vcpu, 1, config_gva);
+
+	if (use_evmcs)
+		vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+}
+
+static bool opt_enable_vpid = true;
+static const char *progname;
+
+static void check_stats(struct kvm_vm *vm,
+			uint32_t nr_l2_vcpus,
+			uint32_t nr_switches,
+			bool use_evmcs,
+			bool sched_only)
+{
+	uint64_t reuse = 0;
+	uint64_t recycle = 0;
+
+	reuse = vm_get_stat(vm, nested_context_reuse);
+	recycle = vm_get_stat(vm, nested_context_recycle);
+
+	if (nr_l2_vcpus <= KVM_NESTED_OVERSUB_RATIO) {
+		GUEST_ASSERT_EQ(reuse, nr_l2_vcpus * (nr_switches - 1));
+		GUEST_ASSERT_EQ(recycle, 0);
+	} else {
+		if (sched_only) {
+			/*
+			 * When scheduling only no L2 vCPU vmcs is cleared so
+			 * we reuse up to the max. number of contexts, but we
+			 * cannot recycle any of them.
+			 */
+			GUEST_ASSERT_EQ(reuse,
+					KVM_NESTED_OVERSUB_RATIO *
+					(nr_switches - 1));
+			GUEST_ASSERT_EQ(recycle, 0);
+		} else {
+			/*
+			 * When migration we cycle in LRU order so no context
+			 * can be reused they are all recycled.
+			 */
+			GUEST_ASSERT_EQ(reuse, 0);
+			GUEST_ASSERT_EQ(recycle,
+					(nr_l2_vcpus * nr_switches) -
+					KVM_NESTED_OVERSUB_RATIO);
+		}
+	}
+
+	printf("%s %u switches with %u L2 vCPUS (%s) reuse %" PRIu64
+	       " recycle %" PRIu64 "\n", progname, nr_switches, nr_l2_vcpus,
+	       use_evmcs ? "evmcs" : (sched_only ? "vmx sched" : "vmx migrate"),
+	       reuse, recycle);
+}
+
+static void run_test(uint32_t nr_l2_vcpus, uint32_t nr_switches,
+		     bool use_evmcs, bool sched_only)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	vm = vm_create_with_one_vcpu(&vcpu, use_evmcs
+				     ? l1_guest_evmcs : l1_guest_vmx);
+
+	prepare_vcpu(vm, vcpu, nr_l2_vcpus, nr_switches,
+		     opt_enable_vpid, use_evmcs, sched_only);
+
+	for (;;) {
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_DONE:
+			goto done;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		default:
+			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+		}
+	}
+
+done:
+	check_stats(vm, nr_l2_vcpus, nr_switches, use_evmcs, sched_only);
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	uint32_t opt_nr_l2_vcpus = 0;
+	uint32_t opt_nr_switches = 0;
+	bool opt_sched_only = true;
+	int opt;
+	int i;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	progname = argv[0];
+
+	while ((opt = getopt(argc, argv, "c:rs:v")) != -1) {
+		switch (opt) {
+		case 'c':
+			opt_nr_l2_vcpus = atoi_paranoid(optarg);
+			break;
+		case 'r':
+			opt_sched_only = false;
+			break;
+		case 's':
+			opt_nr_switches = atoi_paranoid(optarg);
+			break;
+		case 'v':
+			opt_enable_vpid = false;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (opt_nr_l2_vcpus && opt_nr_switches) {
+		run_test(opt_nr_l2_vcpus, opt_nr_switches, false,
+			 opt_sched_only);
+
+		if (kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS))
+			run_test(opt_nr_l2_vcpus, opt_nr_switches,
+				 true, false);
+	} else {
+		/* VMX vmlaunch */
+		for (i = 2; i <= 16; i++)
+			run_test(i, 4, false, false);
+
+		/* VMX vmresume */
+		for (i = 2; i <= 16; i++)
+			run_test(i, 4, false, true);
+
+		/* eVMCS */
+		if (kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+			for (i = 2; i <= 16; i++)
+				run_test(i, 4, true, false);
+		}
+	}
+
+	return 0;
+}
-- 
2.43.0