[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251230230150.4150236-22-seanjc@google.com>
Date: Tue, 30 Dec 2025 15:01:50 -0800
From: Sean Christopherson <seanjc@...gle.com>
To: Paolo Bonzini <pbonzini@...hat.com>, Marc Zyngier <maz@...nel.org>,
Oliver Upton <oupton@...nel.org>, Tianrui Zhao <zhaotianrui@...ngson.cn>,
Bibo Mao <maobibo@...ngson.cn>, Huacai Chen <chenhuacai@...nel.org>,
Anup Patel <anup@...infault.org>, Paul Walmsley <pjw@...nel.org>,
Palmer Dabbelt <palmer@...belt.com>, Albert Ou <aou@...s.berkeley.edu>,
Christian Borntraeger <borntraeger@...ux.ibm.com>, Janosch Frank <frankja@...ux.ibm.com>,
Claudio Imbrenda <imbrenda@...ux.ibm.com>, Sean Christopherson <seanjc@...gle.com>
Cc: kvm@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
kvmarm@...ts.linux.dev, loongarch@...ts.linux.dev,
kvm-riscv@...ts.infradead.org, linux-riscv@...ts.infradead.org,
linux-kernel@...r.kernel.org, Yosry Ahmed <yosry.ahmed@...ux.dev>
Subject: [PATCH v4 21/21] KVM: selftests: Test READ=>WRITE dirty logging
behavior for shadow MMU
Update the nested dirty log test to validate KVM's handling of READ faults
when dirty logging is enabled. Specifically, set the Dirty bit in the
guest PTEs used to map L2 GPAs, so that KVM will create writable SPTEs
when handling L2 read faults. When handling read faults in the shadow MMU,
KVM opportunistically creates a writable SPTE if the mapping can be
writable *and* the gPTE is dirty (or doesn't support the Dirty bit), i.e.
if KVM doesn't need to intercept writes in order to emulate Dirty-bit
updates.
To actually test the L2 READ=>WRITE sequence, e.g. without masking a false
pass by other test activity, route the READ=>WRITE and WRITE=>WRITE
sequences to separate L1 pages, and differentiate between "marked dirty
due to a WRITE access/fault" and "marked dirty due to creating a writable
SPTE for a READ access/fault". The updated sequence exposes the bug fixed
by KVM commit 1f4e5fc83a42 ("KVM: x86: fix nested guest live migration
with PML") when the guest performs a READ=>WRITE sequence.
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
.../selftests/kvm/include/x86/processor.h | 1 +
.../testing/selftests/kvm/lib/x86/processor.c | 7 ++
.../selftests/kvm/x86/nested_dirty_log_test.c | 115 +++++++++++++-----
3 files changed, 90 insertions(+), 33 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index ab29b1c7ed2d..8945c9eea704 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1483,6 +1483,7 @@ bool kvm_cpu_has_tdp(void);
void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void tdp_identity_map_default_memslots(struct kvm_vm *vm);
void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size);
+uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa);
/*
* Basic CPU control in CR0
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index ab869a98bbdc..fab18e9be66c 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -390,6 +390,13 @@ static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm,
return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
}
+uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa)
+{
+ int level = PG_LEVEL_4K;
+
+ return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level);
+}
+
uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr)
{
int level = PG_LEVEL_4K;
diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
index 89d2e86a0db9..1e7c1ed917e1 100644
--- a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
@@ -17,29 +17,39 @@
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
-#define TEST_MEM_PAGES 3
+#define TEST_MEM_PAGES 4
/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM 0xc0000000
+#define GUEST_TEST_MEM1 0xc0000000
+#define GUEST_TEST_MEM2 0xc0002000
/* L2 guest test virtual memory offset */
#define NESTED_TEST_MEM1 0xc0001000
-#define NESTED_TEST_MEM2 0xc0002000
+#define NESTED_TEST_MEM2 0xc0003000
#define L2_GUEST_STACK_SIZE 64
+#define TEST_SYNC_PAGE_MASK 0xfull
+#define TEST_SYNC_READ_FAULT BIT(4)
+#define TEST_SYNC_WRITE_FAULT BIT(5)
+#define TEST_SYNC_NO_FAULT BIT(6)
+
static void l2_guest_code(u64 *a, u64 *b)
{
READ_ONCE(*a);
+ GUEST_SYNC(0 | TEST_SYNC_READ_FAULT);
WRITE_ONCE(*a, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
+ GUEST_SYNC(0 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*a);
+ GUEST_SYNC(0 | TEST_SYNC_NO_FAULT);
WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
+ GUEST_SYNC(2 | TEST_SYNC_WRITE_FAULT);
WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
+ GUEST_SYNC(2 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*b);
+ GUEST_SYNC(2 | TEST_SYNC_NO_FAULT);
+ GUEST_SYNC(2 | TEST_SYNC_NO_FAULT);
/* Exit to L1 and never come back. */
vmcall();
@@ -53,7 +63,7 @@ static void l2_guest_code_tdp_enabled(void)
static void l2_guest_code_tdp_disabled(void)
{
/* Access the same L1 GPAs as l2_guest_code_tdp_enabled() */
- l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+ l2_guest_code((u64 *)GUEST_TEST_MEM1, (u64 *)GUEST_TEST_MEM2);
}
void l1_vmx_code(struct vmx_pages *vmx)
@@ -72,9 +82,11 @@ void l1_vmx_code(struct vmx_pages *vmx)
prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- GUEST_SYNC(false);
+ GUEST_SYNC(0 | TEST_SYNC_NO_FAULT);
+ GUEST_SYNC(2 | TEST_SYNC_NO_FAULT);
GUEST_ASSERT(!vmlaunch());
- GUEST_SYNC(false);
+ GUEST_SYNC(0 | TEST_SYNC_NO_FAULT);
+ GUEST_SYNC(2 | TEST_SYNC_NO_FAULT);
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
GUEST_DONE();
}
@@ -91,9 +103,11 @@ static void l1_svm_code(struct svm_test_data *svm)
generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- GUEST_SYNC(false);
+ GUEST_SYNC(0 | TEST_SYNC_NO_FAULT);
+ GUEST_SYNC(2 | TEST_SYNC_NO_FAULT);
run_guest(svm->vmcb, svm->vmcb_gpa);
- GUEST_SYNC(false);
+ GUEST_SYNC(0 | TEST_SYNC_NO_FAULT);
+ GUEST_SYNC(2 | TEST_SYNC_NO_FAULT);
GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
GUEST_DONE();
}
@@ -106,6 +120,11 @@ static void l1_guest_code(void *data)
l1_svm_code(data);
}
+static uint64_t test_read_host_page(uint64_t *host_test_mem, int page_nr)
+{
+ return host_test_mem[PAGE_SIZE * page_nr / sizeof(*host_test_mem)];
+}
+
static void test_dirty_log(bool nested_tdp)
{
vm_vaddr_t nested_gva = 0;
@@ -133,32 +152,45 @@ static void test_dirty_log(bool nested_tdp)
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- GUEST_TEST_MEM,
+ GUEST_TEST_MEM1,
TEST_MEM_SLOT_INDEX,
TEST_MEM_PAGES,
KVM_MEM_LOG_DIRTY_PAGES);
/*
- * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
+ * Add an identity map for GVA range [0xc0000000, 0xc0004000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
+ virt_map(vm, GUEST_TEST_MEM1, GUEST_TEST_MEM1, TEST_MEM_PAGES);
/*
- * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
- * 0xc0000000.
+ * ... pages in the L2 GPA ranges [0xc0001000, 0xc0002000) and
+ * [0xc0003000, 0xc0004000) will map to 0xc0000000 and 0xc0001000
+ * respectively.
*
* When TDP is disabled, the L2 guest code will still access the same L1
* GPAs as the TDP enabled case.
+ *
+ * Set the Dirty bit in the PTEs used by L2 so that KVM will create
+ * writable SPTEs when handling read faults (if the Dirty bit isn't
+ * set, KVM must intercept the next write to emulate the Dirty bit
+ * update).
*/
if (nested_tdp) {
tdp_identity_map_default_memslots(vm);
- tdp_map(vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
- tdp_map(vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
+ tdp_map(vm, NESTED_TEST_MEM1, GUEST_TEST_MEM1, PAGE_SIZE);
+ tdp_map(vm, NESTED_TEST_MEM2, GUEST_TEST_MEM2, PAGE_SIZE);
+
+
+ *tdp_get_pte(vm, NESTED_TEST_MEM1) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ *tdp_get_pte(vm, NESTED_TEST_MEM2) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ } else {
+ *vm_get_pte(vm, GUEST_TEST_MEM1) |= PTE_DIRTY_MASK(&vm->mmu);
+ *vm_get_pte(vm, GUEST_TEST_MEM2) |= PTE_DIRTY_MASK(&vm->mmu);
}
bmap = bitmap_zalloc(TEST_MEM_PAGES);
- host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+ host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM1);
while (!done) {
memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
@@ -169,25 +201,42 @@ static void test_dirty_log(bool nested_tdp)
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
- case UCALL_SYNC:
+ case UCALL_SYNC: {
+ int page_nr = uc.args[1] & TEST_SYNC_PAGE_MASK;
+ int i;
+
/*
* The nested guest wrote at offset 0x1000 in the memslot, but the
* dirty bitmap must be filled in according to L1 GPA, not L2.
*/
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
- if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
- } else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
+
+ /*
+ * If a fault is expected, the page should be dirty
+ * as the Dirty bit is set in the gPTE. KVM should
+ * create a writable SPTE even on a read fault, *and*
+ * KVM must mark the GFN as dirty when doing so.
+ */
+ TEST_ASSERT(test_bit(page_nr, bmap) == !(uc.args[1] & TEST_SYNC_NO_FAULT),
+ "Page %u incorrectly reported %s on %s fault", page_nr,
+ test_bit(page_nr, bmap) ? "dirty" : "clean",
+ uc.args[1] & TEST_SYNC_NO_FAULT ? "no" :
+ uc.args[1] & TEST_SYNC_READ_FAULT ? "read" : "write");
+
+ for (i = 0; i < TEST_MEM_PAGES; i++) {
+ if (i == page_nr && uc.args[1] & TEST_SYNC_WRITE_FAULT)
+ TEST_ASSERT(test_read_host_page(host_test_mem, i) == 1,
+ "Page %u not written by guest", i);
+ else
+ TEST_ASSERT(test_read_host_page(host_test_mem, i) == 0xaaaaaaaaaaaaaaaaULL,
+ "Page %u written by guest", i);
+
+ if (i != page_nr)
+ TEST_ASSERT(!test_bit(i, bmap),
+ "Page %u incorrectly reported dirty", i);
}
-
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
+ }
case UCALL_DONE:
done = true;
break;
--
2.52.0.351.gbe84eed79e-goog
Powered by blists - more mailing lists