linux-kernel - Re: [PATCH 4/4] KVM: selftests: Add a VMX test for LA57 nested state

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4owz4js4mvl4dohgkydcyrdhh2j2xblbwbo7zistocb4knjzdo@kvrzl7vmvg67>
Date: Mon, 20 Oct 2025 17:26:36 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Jim Mattson <jmattson@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, Shuah Khan <shuah@...nel.org>, 
	Sean Christopherson <seanjc@...gle.com>, Bibo Mao <maobibo@...ngson.cn>, 
	Huacai Chen <chenhuacai@...nel.org>, Andrew Jones <ajones@...tanamicro.com>, 
	Claudio Imbrenda <imbrenda@...ux.ibm.com>, "Pratik R. Sampat" <prsampat@....com>, 
	Kai Huang <kai.huang@...el.com>, Eric Auger <eric.auger@...hat.com>, linux-kernel@...r.kernel.org, 
	kvm@...r.kernel.org, linux-kselftest@...r.kernel.org
Subject: Re: [PATCH 4/4] KVM: selftests: Add a VMX test for LA57 nested state

On Wed, Sep 17, 2025 at 02:48:40PM -0700, Jim Mattson wrote:
> Add a selftest that verifies KVM's ability to save and restore
> nested state when the L1 guest is using 5-level paging and the L2
> guest is using 4-level paging. Specifically, canonicality tests of
> the VMCS12 host-state fields should accept 57-bit virtual addresses.
> 
> Signed-off-by: Jim Mattson <jmattson@...gle.com>
> ---
>  tools/testing/selftests/kvm/Makefile.kvm      |   1 +
>  .../kvm/x86/vmx_la57_nested_state_test.c      | 137 ++++++++++++++++++
>  2 files changed, 138 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c
> 
> diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
> index 41b40c676d7f..f1958b88ec59 100644
> --- a/tools/testing/selftests/kvm/Makefile.kvm
> +++ b/tools/testing/selftests/kvm/Makefile.kvm
> @@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
>  TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
>  TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
>  TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
> +TEST_GEN_PROGS_x86 += x86/vmx_la57_nested_state_test
>  TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
>  TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
>  TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
> diff --git a/tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c
> new file mode 100644
> index 000000000000..7c3c4c1c17f6
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c
> @@ -0,0 +1,137 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * vmx_la57_nested_state_test
> + *
> + * Copyright (C) 2025, Google LLC.
> + *
> + * Test KVM's ability to save and restore nested state when the L1 guest
> + * is using 5-level paging and the L2 guest is using 4-level paging.
> + *
> + * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
> + * model canonical checks more precisely").
> + */
> +#include "test_util.h"
> +#include "kvm_util.h"
> +#include "processor.h"
> +#include "vmx.h"
> +
> +#define LA57_GS_BASE 0xff2bc0311fb00000ull
> +
> +static void l2_guest_code(void)
> +{
> +	/*
> +	 * Sync with L0 to trigger save/restore.  After
> +	 * resuming, execute VMCALL to exit back to L1.
> +	 */
> +	GUEST_SYNC(1);
> +	vmcall();
> +}
> +
> +static void l1_guest_code(struct vmx_pages *vmx_pages)
> +{
> +#define L2_GUEST_STACK_SIZE 64
> +	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
> +	u64 guest_cr4;
> +	vm_paddr_t pml5_pa, pml4_pa;
> +	u64 *pml5;
> +	u64 exit_reason;
> +
> +	/* Set GS_BASE to a value that is only canonical with LA57. */
> +	wrmsr(MSR_GS_BASE, LA57_GS_BASE);
> +	GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
> +
> +	GUEST_ASSERT(vmx_pages->vmcs_gpa);
> +	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
> +	GUEST_ASSERT(load_vmcs(vmx_pages));
> +
> +	prepare_vmcs(vmx_pages, l2_guest_code,
> +		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
> +
> +	/*
> +	 * Set up L2 with a 4-level page table by pointing its CR3 to L1's
> +	 * PML4 table and clearing CR4.LA57. This creates the CR4.LA57
> +	 * mismatch that exercises the bug.
> +	 */
> +	pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
> +	pml5 = (u64 *)pml5_pa;
> +	pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
> +	vmwrite(GUEST_CR3, pml4_pa);

Clever :)

> +
> +	guest_cr4 = vmreadz(GUEST_CR4);
> +	guest_cr4 &= ~X86_CR4_LA57;
> +	vmwrite(GUEST_CR4, guest_cr4);
> +
> +	GUEST_ASSERT(!vmlaunch());
> +
> +	exit_reason = vmreadz(VM_EXIT_REASON);
> +	GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
> +}
> +
> +void guest_code(struct vmx_pages *vmx_pages)
> +{
> +	if (vmx_pages)
> +		l1_guest_code(vmx_pages);

I think none of the other tests do the NULL check. Seems like the test
will actually pass if we pass vmx_pages == NULL. I think it's better if
we let L1 crash if we mess up the setup.

> +
> +	GUEST_DONE();
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	vm_vaddr_t vmx_pages_gva = 0;
> +	struct kvm_vm *vm;
> +	struct kvm_vcpu *vcpu;
> +	struct kvm_x86_state *state;
> +	struct ucall uc;
> +	int stage;
> +
> +	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
> +	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
> +	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
> +
> +	vm = vm_create_shape_with_one_vcpu(VM_SHAPE(VM_MODE_PXXV57_4K), &vcpu,
> +					   guest_code);
> +
> +	/*
> +	 * L1 needs to read its own PML5 table to set up L2. Identity map
> +	 * the PML5 table to facilitate this.
> +	 */
> +	virt_map(vm, vm->pgd, vm->pgd, 1);
> +
> +	vcpu_alloc_vmx(vm, &vmx_pages_gva);
> +	vcpu_args_set(vcpu, 1, vmx_pages_gva);
> +
> +	for (stage = 1;; stage++) {
> +		vcpu_run(vcpu);
> +		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> +
> +		switch (get_ucall(vcpu, &uc)) {
> +		case UCALL_ABORT:
> +			REPORT_GUEST_ASSERT(uc);
> +			/* NOT REACHED */
> +		case UCALL_SYNC:
> +			break;
> +		case UCALL_DONE:
> +			goto done;
> +		default:
> +			TEST_FAIL("Unknown ucall %lu", uc.cmd);
> +		}
> +
> +		TEST_ASSERT(uc.args[1] == stage,
> +			    "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
> +		if (stage == 1) {
> +			pr_info("L2 is active; performing save/restore.\n");
> +			state = vcpu_save_state(vcpu);
> +
> +			kvm_vm_release(vm);
> +
> +			/* Restore state in a new VM. */
> +			vcpu = vm_recreate_with_one_vcpu(vm);
> +			vcpu_load_state(vcpu, state);
> +			kvm_x86_state_cleanup(state);

It seems like we only load the vCPU state but we don't actually run it
after restoring the nested state. Should we have another stage and run
L2 again after the restore? What is the current failure mode without
9245fd6b8531?

> +		}
> +	}
> +
> +done:
> +	kvm_vm_free(vm);
> +	return 0;
> +}
> -- 
> 2.51.0.470.ga7dc726c21-goog
>