lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <c95acb18a40593e398747395cf7e92877a1805b7.1749672978.git.afranji@google.com>
Date: Wed, 11 Jun 2025 21:16:37 +0000
From: Ryan Afranji <afranji@...gle.com>
To: kvm@...r.kernel.org, linux-kernel@...r.kernel.org, x86@...nel.org
Cc: sagis@...gle.com, bp@...en8.de, chao.p.peng@...ux.intel.com, 
	dave.hansen@...ux.intel.com, dmatlack@...gle.com, erdemaktas@...gle.com, 
	isaku.yamahata@...el.com, kai.huang@...el.com, mingo@...hat.com, 
	pbonzini@...hat.com, seanjc@...gle.com, tglx@...utronix.de, 
	zhi.wang.linux@...il.com, ackerleytng@...gle.com, andrew.jones@...ux.dev, 
	david@...hat.com, hpa@...or.com, kirill.shutemov@...ux.intel.com, 
	linux-kselftest@...r.kernel.org, tabba@...gle.com, vannapurve@...gle.com, 
	yan.y.zhao@...el.com, rick.p.edgecombe@...el.com, 
	Ryan Afranji <afranji@...gle.com>
Subject: [RFC PATCH v2 10/10] KVM: selftests: Add irqfd/interrupts test for
 TDX with migration

From: Ackerley Tng <ackerleytng@...gle.com>

Adds a selftest to verify interrupts sent to a TDX VM before migration
are successfully handled by the migrated VM.

Co-developed-by: Ryan Afranji <afranji@...gle.com>
Signed-off-by: Ryan Afranji <afranji@...gle.com>
Signed-off-by: Ackerley Tng <ackerleytng@...gle.com>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../testing/selftests/kvm/include/kvm_util.h  |   4 +
 .../selftests/kvm/include/x86/tdx/tdx_util.h  |   2 +
 .../selftests/kvm/include/x86/tdx/test_util.h |   5 +
 tools/testing/selftests/kvm/lib/kvm_util.c    |  35 ++-
 .../selftests/kvm/lib/x86/tdx/tdx_util.c      |  20 ++
 .../selftests/kvm/lib/x86/tdx/test_util.c     |  17 ++
 .../kvm/x86/tdx_irqfd_migrate_test.c          | 264 ++++++++++++++++++
 .../selftests/kvm/x86/tdx_migrate_tests.c     |  21 --
 9 files changed, 343 insertions(+), 26 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86/tdx_irqfd_migrate_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index d4c8cfb5910f..4ae0d105c2a7 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -156,6 +156,7 @@ TEST_GEN_PROGS_x86 += x86/tdx_vm_test
 TEST_GEN_PROGS_x86 += x86/tdx_shared_mem_test
 TEST_GEN_PROGS_x86 += x86/tdx_upm_test
 TEST_GEN_PROGS_x86 += x86/tdx_migrate_tests
+TEST_GEN_PROGS_x86 += x86/tdx_irqfd_migrate_test
 
 # Compiled outputs used by test targets
 TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 8b252a668c78..f93ac2b9b0ff 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -80,6 +80,7 @@ enum kvm_mem_region_type {
 	MEM_REGION_PT,
 	MEM_REGION_TEST_DATA,
 	MEM_REGION_TDX_BOOT_PARAMS,
+	MEM_REGION_TDX_SHARED_DATA,
 	MEM_REGION_UCALL,
 	NR_MEM_REGIONS,
 };
@@ -958,6 +959,9 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
 struct kvm_irq_routing *kvm_gsi_routing_create(void);
 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
 		uint32_t gsi, uint32_t pin);
+void kvm_gsi_routing_msi_add(struct kvm_irq_routing *routing, uint32_t gsi,
+			     uint32_t address_lo, uint32_t address_hi,
+			     uint32_t data);
 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
 
diff --git a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
index 9b495e621225..4393c8649718 100644
--- a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
+++ b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
@@ -10,6 +10,8 @@ extern uint64_t tdx_s_bit;
 void tdx_filter_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid_data);
 void __tdx_mask_cpuid_features(struct kvm_cpuid_entry2 *entry);
 void tdx_enable_capabilities(struct kvm_vm *vm);
+int __tdx_migrate_from(int dst_fd, int src_fd);
+void tdx_migrate_from(struct kvm_vm *dst_vm, struct kvm_vm *src_vm);
 
 struct kvm_vcpu *td_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code);
 
diff --git a/tools/testing/selftests/kvm/include/x86/tdx/test_util.h b/tools/testing/selftests/kvm/include/x86/tdx/test_util.h
index 3330d5a54698..0dd859974cb3 100644
--- a/tools/testing/selftests/kvm/include/x86/tdx/test_util.h
+++ b/tools/testing/selftests/kvm/include/x86/tdx/test_util.h
@@ -130,4 +130,9 @@ uint64_t tdx_test_read_64bit(struct kvm_vcpu *vcpu, uint64_t port);
  */
 uint64_t tdx_test_read_64bit_report_from_guest(struct kvm_vcpu *vcpu);
 
+/*
+ * Enables X2APIC for TDX guests.
+ */
+void tdx_guest_x2apic_enable(void);
+
 #endif // SELFTEST_TDX_TEST_UTIL_H
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9dc3c7bf0443..bbb489635064 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1293,10 +1293,12 @@ static void vm_migrate_mem_region(struct kvm_vm *dst_vm, struct kvm_vm *src_vm,
 				  struct userspace_mem_region *src_region)
 {
 	struct userspace_mem_region *dst_region;
-	int dst_guest_memfd;
+	int dst_guest_memfd = -1;
 
-	dst_guest_memfd =
-		vm_link_guest_memfd(dst_vm, src_region->region.guest_memfd, 0);
+	if (src_region->region.guest_memfd != -1)
+		dst_guest_memfd = vm_link_guest_memfd(dst_vm,
+						      src_region->region.guest_memfd,
+						      0);
 
 	dst_region = vm_mem_region_alloc(
 			dst_vm, src_region->region.guest_phys_addr,
@@ -1312,8 +1314,12 @@ static void vm_migrate_mem_region(struct kvm_vm *dst_vm, struct kvm_vm *src_vm,
 	src_region->host_mem = 0;
 
 	dst_region->region.guest_memfd = dst_guest_memfd;
-	dst_region->region.guest_memfd_offset =
-		src_region->region.guest_memfd_offset;
+	if (src_region->region.guest_memfd == -1) {
+		dst_region->fd = src_region->fd;
+	} else {
+		dst_region->region.guest_memfd_offset =
+			src_region->region.guest_memfd_offset;
+	}
 
 	userspace_mem_region_commit(dst_vm, dst_region);
 }
@@ -2057,6 +2063,25 @@ void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
 	routing->nr++;
 }
 
+void kvm_gsi_routing_msi_add(struct kvm_irq_routing *routing, uint32_t gsi,
+			     uint32_t address_lo, uint32_t address_hi,
+			     uint32_t data)
+{
+	int i;
+
+	assert(routing);
+	assert(routing->nr < KVM_MAX_IRQ_ROUTES);
+
+	i = routing->nr;
+	routing->entries[i].gsi = gsi;
+	routing->entries[i].type = KVM_IRQ_ROUTING_MSI;
+	routing->entries[i].flags = 0;
+	routing->entries[i].u.msi.address_lo = address_lo;
+	routing->entries[i].u.msi.address_hi = address_hi;
+	routing->entries[i].u.msi.data = data;
+	routing->nr++;
+}
+
 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
 {
 	int ret;
diff --git a/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c
index a3612bf187a0..8216a778474a 100644
--- a/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c
+++ b/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c
@@ -372,6 +372,26 @@ static void tdx_apply_cr4_restrictions(struct kvm_sregs *sregs)
 	sregs->cr4 &= ~(X86_CR4_VMXE | X86_CR4_SMXE);
 }
 
+int __tdx_migrate_from(int dst_fd, int src_fd)
+{
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
+		.args = { src_fd }
+	};
+
+	return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+}
+
+void tdx_migrate_from(struct kvm_vm *dst_vm, struct kvm_vm *src_vm)
+{
+	int ret;
+
+	vm_migrate_mem_regions(dst_vm, src_vm);
+	ret = __tdx_migrate_from(dst_vm->fd, src_vm->fd);
+	TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
+	src_vm->enc_migrated = true;
+}
+
 static void load_td_boot_code(struct kvm_vm *vm)
 {
 	void *boot_code_hva = addr_gpa2hva(vm, FOUR_GIGABYTES_GPA - TD_BOOT_CODE_SIZE);
diff --git a/tools/testing/selftests/kvm/lib/x86/tdx/test_util.c b/tools/testing/selftests/kvm/lib/x86/tdx/test_util.c
index f92ddda2d1ac..7b622ccb2433 100644
--- a/tools/testing/selftests/kvm/lib/x86/tdx/test_util.c
+++ b/tools/testing/selftests/kvm/lib/x86/tdx/test_util.c
@@ -6,6 +6,7 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "apic.h"
 #include "kvm_util.h"
 #include "tdx/tdcall.h"
 #include "tdx/tdx.h"
@@ -185,3 +186,19 @@ uint64_t tdx_test_read_64bit_report_from_guest(struct kvm_vcpu *vcpu)
 {
 	return tdx_test_read_64bit(vcpu, TDX_TEST_REPORT_PORT);
 }
+
+void tdx_guest_x2apic_enable(void)
+{
+	uint64_t x2apic_spiv = APIC_BASE_MSR + (APIC_SPIV >> 4);
+	uint64_t value, ret;
+
+	/*
+	 * x2apic does not have to be enabled for TDs, TDs already have x2apic
+	 * enabled, and must use x2apic. Hence, we just soft-enable APIC.
+	 */
+	ret = tdg_vp_vmcall_instruction_rdmsr(x2apic_spiv, &value);
+	GUEST_ASSERT_EQ(ret, 0);
+	ret = tdg_vp_vmcall_instruction_wrmsr(x2apic_spiv,
+					      value | APIC_SPIV_APIC_ENABLED);
+	GUEST_ASSERT_EQ(ret, 0);
+}
diff --git a/tools/testing/selftests/kvm/x86/tdx_irqfd_migrate_test.c b/tools/testing/selftests/kvm/x86/tdx_irqfd_migrate_test.c
new file mode 100644
index 000000000000..d80cc204bd67
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/tdx_irqfd_migrate_test.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/kvm.h>
+#include <string.h>
+#include <sys/eventfd.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "tdx/tdcall.h"
+#include "tdx/tdx.h"
+#include "tdx/tdx_util.h"
+#include "tdx/test_util.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+#define TEST_IRQ_PIN 24
+
+#define NUM_INTERRUPTS 256
+#define INTERRUPT_COUNT_GPA 0x100000000ULL
+#define INTERRUPT_COUNT_MEMSLOT 5
+
+#define MIGRATION_LOOPS 10
+
+static uint32_t (*interrupt_count_per_vector)[NUM_INTERRUPTS];
+
+static void interrupt_handler_increment_count(struct ex_regs *regs)
+{
+	(*interrupt_count_per_vector)[regs->vector]++;
+	x2apic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_code(void)
+{
+	uint32_t sync_count = 0;
+
+	tdx_guest_x2apic_enable();
+
+	/* Enable interrupts which are disabled by default. */
+	asm volatile("sti");
+
+	/* Keep guest runnable by continuously looping. */
+	while (true)
+		GUEST_SYNC(++sync_count);
+}
+
+/**
+ * gsi_route_add - Used to add a GSI route.
+ *
+ * @msi_redir_hint: Look up "Message Address Register Format" in Intel SDM
+ * @dest_mode: Look up "Message Address Register Format" in Intel SDM
+ *             Use false for DM=0 and true for DM=1
+ * @trig_mode: Look up "Message Data Register Format" in Intel SDM
+ *             Use false for edge sensitive and true for level sensitive
+ * @delivery_mode: A 3 bit code: look up "Message Data Register Format"
+ *
+ * Add a route by building up the routing information in address_hi, address_lo
+ * and data according to how it is used in struct kvm_lapic_irq. For full
+ * details, look up how fields in struct kvm_lapic_irq are used.
+ *
+ * Return: None
+ */
+static void gsi_route_add(struct kvm_irq_routing *table, uint32_t gsi,
+			  bool use_x2apic_format, uint32_t dest_id,
+			  uint8_t vector, bool msi_redir_hint, bool dest_mode,
+			  bool trig_mode, uint8_t delivery_mode)
+{
+	union {
+		struct {
+			u32 vector : 8, delivery_mode : 3,
+			dest_mode_logical : 1, reserved : 2,
+			active_low : 1, is_level : 1;
+		};
+		uint32_t as_uint32;
+	} data = { 0 };
+	union {
+		struct {
+			u32 reserved_0 : 2, dest_mode_logical : 1,
+			    redirect_hint : 1, reserved_1 : 1,
+			    virt_destid_8_14 : 7, destid_0_7 : 8,
+			    base_address : 12;
+		};
+		uint32_t as_uint32;
+	} address_lo = { 0 };
+	union {
+		struct {
+			u32 reserved : 8, destid_8_31 : 24;
+		};
+		uint32_t as_uint32;
+	} address_hi = { 0 };
+
+	/* Fixed 0xfee (see Intel SDM "Message Address Register Format") */
+	address_lo.base_address = 0xfee;
+
+	address_lo.destid_0_7 = dest_id & 0xff;
+	if (use_x2apic_format)
+		address_hi.destid_8_31 = (dest_id & 0xffffff00) >> 8;
+
+	data.vector = vector;
+	address_lo.dest_mode_logical = dest_mode;
+	data.is_level = trig_mode;
+	data.delivery_mode = delivery_mode & 0b111;
+	address_lo.redirect_hint = msi_redir_hint;
+
+	kvm_gsi_routing_msi_add(table, gsi, address_lo.as_uint32,
+				address_hi.as_uint32, data.as_uint32);
+}
+
+/**
+ * Sets up KVM irqfd in @vm
+ *
+ * @gsi: irqchip pin toggled by this event
+ */
+static void set_irqfd(struct kvm_vm *vm, int fd, uint32_t gsi, bool assign)
+{
+	struct kvm_irqfd ifd = {
+		.fd = fd,
+		.gsi = gsi,
+		.flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
+		.resamplefd = 0,
+	};
+
+	vm_ioctl(vm, KVM_IRQFD, &ifd);
+}
+
+static void setup_interrupt_count_per_vector(struct kvm_vm *vm)
+{
+	vm_vaddr_t gva;
+	int npages;
+
+	npages = round_up(sizeof(*interrupt_count_per_vector), PAGE_SIZE);
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    INTERRUPT_COUNT_GPA,
+				    INTERRUPT_COUNT_MEMSLOT, npages, 0);
+	vm->memslots[MEM_REGION_TDX_SHARED_DATA] = INTERRUPT_COUNT_MEMSLOT;
+
+	gva = vm_vaddr_alloc_shared(vm, sizeof(*interrupt_count_per_vector),
+				    KVM_UTIL_MIN_VADDR,
+				    MEM_REGION_TDX_SHARED_DATA);
+
+	interrupt_count_per_vector = addr_gva2hva(vm, gva);
+	memset(interrupt_count_per_vector, 0,
+	       sizeof(*interrupt_count_per_vector));
+
+	write_guest_global(vm, interrupt_count_per_vector,
+			   (uint32_t(*)[NUM_INTERRUPTS])gva);
+}
+
+static void handle_vcpu_exit(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		break;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+	default:
+		TEST_FAIL("Unexpected exit: %s",
+			  exit_reason_str(vcpu->run->exit_reason));
+	}
+}
+
+void map_gsis_to_vectors(struct kvm_vm *vm, struct kvm_vcpu *vcpu, int *eventfds)
+{
+	struct kvm_irq_routing *table;
+	uint32_t vector_and_gsi;
+	int efd;
+
+	/* Flush table first. */
+	table = kvm_gsi_routing_create();
+	kvm_gsi_routing_write(vm, table);
+
+	/* Writing frees table, so we have to create another one. */
+	table = kvm_gsi_routing_create();
+
+	/* Map vectors to gsis 1 to 1 */
+	for (vector_and_gsi = 32; vector_and_gsi < NUM_INTERRUPTS;
+	     ++vector_and_gsi) {
+		gsi_route_add(table, vector_and_gsi,
+			      /*use_x2apic_format=*/true,
+			      /*dest_id=*/vcpu->id,
+			      /*vector=*/vector_and_gsi,
+			      /*msi_redir_hint=*/false,
+			      /*dest_mode=*/false,
+			      /*trig_mode=*/false,
+			      /*delivery_mode=*/0b000);
+
+		efd = eventfd(0, EFD_NONBLOCK);
+		set_irqfd(vm, efd, vector_and_gsi, true);
+
+		eventfds[vector_and_gsi] = efd;
+	}
+
+	/* Configure KVM. Writing frees table. */
+	kvm_gsi_routing_write(vm, table);
+
+}
+
+int main(int argc, char *argv[])
+{
+	int eventfds[NUM_INTERRUPTS] = { 0 };
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int vector, migration;
+
+	TEST_REQUIRE(kvm_check_cap(KVM_CAP_SPLIT_IRQCHIP));
+
+	setbuf(stdout, NULL);
+
+	vm = td_create();
+	td_initialize(vm, VM_MEM_SRC_ANONYMOUS, 0);
+
+	vcpu = td_vcpu_add(vm, 0, guest_code);
+
+	for (vector = 0; vector < NUM_INTERRUPTS; ++vector) {
+		vm_install_exception_handler(vm, vector,
+					     interrupt_handler_increment_count);
+	}
+
+	setup_interrupt_count_per_vector(vm);
+
+	td_finalize(vm);
+
+	map_gsis_to_vectors(vm, vcpu, eventfds);
+
+	tdx_run(vcpu);
+	handle_vcpu_exit(vcpu);
+
+	for (migration = 0; migration < MIGRATION_LOOPS; ++migration) {
+		struct kvm_vcpu *next_vcpu;
+		struct kvm_vm *next_vm;
+
+		next_vm = td_create();
+		tdx_enable_capabilities(next_vm);
+		next_vcpu = vm_vcpu_recreate(next_vm, 0);
+
+		/* Inject on source VM. */
+		for (vector = 32; vector < NUM_INTERRUPTS; ++vector)
+			TEST_ASSERT_EQ(eventfd_write(eventfds[vector], 1), 0);
+
+		map_gsis_to_vectors(next_vm, next_vcpu, eventfds);
+
+		vcpu = next_vcpu;
+
+		tdx_migrate_from(next_vm, vm);
+		kvm_vm_free(vm);
+		vm = next_vm;
+
+		tdx_run(vcpu);
+		handle_vcpu_exit(vcpu);
+
+		for (vector = 32; vector < NUM_INTERRUPTS; ++vector)
+			TEST_ASSERT_EQ((*interrupt_count_per_vector)[vector],
+				       migration + 1);
+	}
+
+	kvm_vm_free(vm);
+	for (vector = 32; vector < NUM_INTERRUPTS; ++vector)
+		close(eventfds[vector]);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/tdx_migrate_tests.c b/tools/testing/selftests/kvm/x86/tdx_migrate_tests.c
index e15da2aa0437..498e42f37697 100644
--- a/tools/testing/selftests/kvm/x86/tdx_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/tdx_migrate_tests.c
@@ -10,27 +10,6 @@
 #define NR_MIGRATE_TEST_VMS 10
 #define TDX_IOEXIT_TEST_PORT 0x50
 
-static int __tdx_migrate_from(int dst_fd, int src_fd)
-{
-	struct kvm_enable_cap cap = {
-		.cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
-		.args = { src_fd }
-	};
-
-	return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
-}
-
-
-static void tdx_migrate_from(struct kvm_vm *dst_vm, struct kvm_vm *src_vm)
-{
-	int ret;
-
-	vm_migrate_mem_regions(dst_vm, src_vm);
-	ret = __tdx_migrate_from(dst_vm->fd, src_vm->fd);
-	TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
-	src_vm->enc_migrated = true;
-}
-
 void guest_code(void)
 {
 	int ret;
-- 
2.50.0.rc1.591.g9c95f17f64-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ