lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250922202452.45810-1-prl@amazon.com>
Date: Mon, 22 Sep 2025 13:24:52 -0700
From: Priscilla Lam <prl@...zon.com>
To: <maz@...nel.org>, <oliver.upton@...ux.dev>, <joey.gouly@....com>,
	<suzuki.poulose@....com>, <yuzenghui@...wei.com>
CC: <dwmw@...zon.co.uk>, <gurugubs@...zon.com>, <christoffer.dall@....com>,
	<graf@...zon.com>, <linux-arm-kernel@...ts.infradead.org>,
	<kvmarm@...ts.linux.dev>, <linux-kernel@...r.kernel.org>
Subject: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64

There is a KVM_TRANSLATE ioctl for x86 to translate a GVA
(guest virtual address) to a GPA (guest physical address) in EL1
which is not yet implemented for arm64.

Implement KVM_TRANSLATE on arm64 for both configurations that
support and do not support VHE. The VHE path uses the AT
instruction directly while the non-VHE implementation wraps the
AT call in a hypercall to allow for its execution in EL2. Add
selftest that tests the ioctl in both configurations.

Signed-off-by: Priscilla Lam <prl@...zon.com>
---
 arch/arm64/include/asm/kvm_asm.h              |   2 +
 arch/arm64/kvm/guest.c                        |  89 ++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/Makefile              |   3 +-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  10 ++
 arch/arm64/kvm/hyp/nvhe/translate.c           |  84 ++++++++++++++
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 tools/testing/selftests/kvm/arm64/translate.c | 107 ++++++++++++++++++
 7 files changed, 292 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/nvhe/translate.c
 create mode 100644 tools/testing/selftests/kvm/arm64/translate.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index bec227f9500a..56ecf4691650 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -87,6 +87,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
 	__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
+	__KVM_HOST_SMCCC_FUNC___kvm_hyp_translate,
 };
 
 #define DECLARE_KVM_VHE_SYM(sym)	extern char sym[]
@@ -289,6 +290,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
 asmlinkage void kvm_unexpected_el2_exception(void);
 struct kvm_cpu_context;
 void handle_trap(struct kvm_cpu_context *host_ctxt);
+extern u64 __kvm_hyp_translate(struct kvm_vcpu *vcpu, u64 gva);
 asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
 void __noreturn __pkvm_init_finalise(void);
 void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 16ba5e9ac86c..180ea1df66cc 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -23,6 +23,7 @@
 #include <linux/uaccess.h>
 #include <asm/fpsimd.h>
 #include <asm/kvm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_nested.h>
 #include <asm/sigcontext.h>
@@ -932,10 +933,92 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -EINVAL;
 }
 
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-				  struct kvm_translation *tr)
+static inline uint64_t par_to_ipa(uint64_t par, uint64_t va)
 {
-	return -EINVAL;
+	uint64_t offset = va & ((1ULL << PAGE_SHIFT) - 1);
+
+	return (par & GENMASK_ULL(51, 12)) | offset;
+}
+
+static int kvm_translate_vhe(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+{
+	unsigned long flags;
+	uint64_t hcr_old, hcr_new, par;
+	const uint64_t gva = tr->linear_address;
+
+	preempt_disable();
+	local_irq_save(flags);
+
+	/* Ensure we're in the expected VHE regime and enable S2 so PAR returns IPA. */
+	hcr_old = read_sysreg(hcr_el2);
+	hcr_new = hcr_old | HCR_E2H | HCR_VM;
+	hcr_new &= ~HCR_TGE;
+	write_sysreg(hcr_new, hcr_el2);
+	isb();
+
+	/* Load guest EL1 S1 context into *_EL12 (do not write into _EL1). */
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR_EL12);
+
+	/* Check address read */
+	asm volatile("at s1e1r, %0" :: "r"(gva));
+	isb();
+
+	par = read_sysreg(par_el1);
+	if (!(par & 1)) {
+		tr->valid = true;
+		tr->physical_address = par_to_ipa(par, gva);
+	}
+
+	/* Check address write */
+	asm volatile("at s1e1w, %0" :: "r"(gva));
+	isb();
+
+	par = read_sysreg(par_el1);
+
+	if (!(par & 1)) {
+		tr->valid = true;
+		tr->writeable = true;
+		tr->physical_address = par_to_ipa(par, gva);
+	}
+
+	/* Restore HCR_EL2 and exit */
+	write_sysreg(hcr_old, hcr_el2);
+	isb();
+	local_irq_restore(flags);
+	preempt_enable();
+
+	return 0;
+}
+
+static int kvm_translate_nvhe(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+{
+	u64 ret;
+
+	preempt_disable();
+	local_irq_disable();
+	ret = kvm_call_hyp_nvhe(__kvm_hyp_translate, vcpu, tr->linear_address);
+	local_irq_enable();
+	preempt_enable();
+
+	/* Unpack result: IPA in bits 63:8, valid in bit 4, writeable in bit 0 */
+	tr->physical_address = ret >> 8;
+	tr->valid = !!(ret & (1ULL << 4));
+	tr->writeable = !!(ret & 1ULL);
+	tr->usermode = 0;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+{
+	if (has_vhe())
+		return kvm_translate_vhe(vcpu, tr);
+	else
+		return kvm_translate_nvhe(vcpu, tr);
 }
 
 /**
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 0b0a68b663d4..bcbd4e5125b1 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -24,7 +24,8 @@ CFLAGS_switch.nvhe.o += -Wno-override-init
 
 hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
 	 hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
-	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
+	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o \
+	 translate.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 3206b2c07f82..a52cf002822c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -573,6 +573,15 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
 }
 
+static void handle___kvm_hyp_translate(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
+	DECLARE_REG(u64, gva, host_ctxt, 2);
+
+	host_vcpu = kern_hyp_va(host_vcpu);
+	cpu_reg(host_ctxt, 1) = __kvm_hyp_translate(host_vcpu, gva);
+}
+
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -612,6 +621,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__pkvm_vcpu_load),
 	HANDLE_FUNC(__pkvm_vcpu_put),
 	HANDLE_FUNC(__pkvm_tlb_flush_vmid),
+	HANDLE_FUNC(__kvm_hyp_translate),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/translate.c b/arch/arm64/kvm/hyp/nvhe/translate.c
new file mode 100644
index 000000000000..239a095a015d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/translate.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Author: Priscilla Lam <prl@...zon.com>
+ */
+
+#include <asm/sysreg.h>
+#include <hyp/sysreg-sr.h>
+#include <nvhe/mem_protect.h>
+
+static __always_inline u64 par_to_ipa(u64 par, u64 va)
+{
+	u64 offset = va & ((1ULL << PAGE_SHIFT) - 1);
+
+	return (par & GENMASK_ULL(51, 12)) | offset;
+}
+
+/**
+ * __kvm_hyp_translate - hypercall that translates a GVA to GPA when VHE is not enabled or available
+ * @vcpu: the vCPU pointer
+ * @gva: the guest virtual address
+ *
+ * This returns the result in a packed integer. The GPA if successful will be in bits 63:8, the
+ * validity in bit 4, and if the address is writeable in bit 0.
+ */
+u64 __kvm_hyp_translate(struct kvm_vcpu *vcpu, u64 gva)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	struct kvm_s2_mmu *mmu;
+
+	u64 hcr_old = read_sysreg(hcr_el2);
+	u64 par = 0;
+	u64 gpa = 0;
+	bool valid = false;
+	bool writeable = false;
+
+	host_ctxt = host_data_ptr(host_ctxt);
+	host_ctxt->__hyp_running_vcpu = vcpu;
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__sysreg_save_state_nvhe(host_ctxt);
+	__debug_save_host_buffers_nvhe(vcpu);
+
+	dsb(nsh);
+
+	__sysreg_restore_state_nvhe(guest_ctxt);
+
+	mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+	__load_stage2(mmu, kern_hyp_va(mmu->arch));
+
+	write_sysreg((hcr_old | HCR_E2H | HCR_VM) & ~HCR_TGE, hcr_el2);
+	isb();
+
+	asm volatile("at s1e1r, %0" :: "r"(gva));
+	isb();
+
+	par = read_sysreg(par_el1);
+
+	if (!(par & 1)) {
+		gpa = par_to_ipa(par, gva);
+		valid = true;
+	}
+
+	if (valid) {
+		asm volatile("at s1e1w, %0" :: "r"(gva));
+		isb();
+
+		par = read_sysreg(par_el1);
+		if (!(par & 1))
+			writeable = true;
+	}
+
+	write_sysreg(hcr_old, hcr_el2);
+	isb();
+
+	__load_host_stage2();
+	__sysreg_restore_state_nvhe(host_ctxt);
+	__debug_restore_host_buffers_nvhe(vcpu);
+	host_ctxt->__hyp_running_vcpu = NULL;
+
+	// Pack result: GPA in bits 63:8, valid in bit 4, writeable in bit 0
+	return (gpa << 8) | (valid ? (1ULL << 4) : 0) | (writeable ? 1ULL : 0);
+}
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 41b40c676d7f..894b1b888ce4 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -163,6 +163,7 @@ TEST_GEN_PROGS_arm64 += arm64/page_fault_test
 TEST_GEN_PROGS_arm64 += arm64/psci_test
 TEST_GEN_PROGS_arm64 += arm64/set_id_regs
 TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/translate
 TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
 TEST_GEN_PROGS_arm64 += arm64/vgic_init
 TEST_GEN_PROGS_arm64 += arm64/vgic_irq
diff --git a/tools/testing/selftests/kvm/arm64/translate.c b/tools/testing/selftests/kvm/arm64/translate.c
new file mode 100644
index 000000000000..5cdc975ae52a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/translate.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * translate: Test the KVM_TRANSLATE ioctl on AArch64 by setting up
+ * guest page table mappings and verifying that the ioctl correctly
+ * translates guest virtual addresses to guest physical addresses.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define GUEST_TEST_GVA1		0x400000
+#define GUEST_TEST_GVA2		0x500000
+#define GUEST_UNMAPPED_GVA	0x600000
+
+/* AArch64 page table entry flags */
+#define PTE_RDONLY		(1ULL << 7)	/* AP[2] - Read-only */
+
+static void guest_code(void)
+{
+	GUEST_DONE();
+}
+
+/*
+ * Create a read-only page mapping by first creating a normal mapping
+ * and then modifying the PTE to add the read-only flag.
+ */
+static void virt_pg_map_readonly(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+	uint64_t *ptep;
+
+	/* First create a normal read-write mapping */
+	virt_pg_map(vm, vaddr, paddr);
+
+	/* Now find the PTE and modify it to be read-only */
+	ptep = virt_get_pte_hva(vm, vaddr);
+	TEST_ASSERT(ptep, "Failed to get PTE for GVA 0x%lx", vaddr);
+
+	/* Set the read-only bit in the PTE */
+	*ptep |= PTE_RDONLY;
+}
+
+int main(void)
+{
+	struct kvm_translation tr;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	vm_vaddr_t gva1, gva2;
+	vm_paddr_t gpa1, gpa2;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	/* Set up two different GVA to GPA mappings with different permissions. */
+	gva1 = GUEST_TEST_GVA1;
+	gpa1 = vm_phy_page_alloc(vm, vm->page_size, vm->memslots[MEM_REGION_TEST_DATA]);
+	printf("Allocated GPA1: 0x%lx for GVA1: 0x%lx\n", (unsigned long)gpa1, (unsigned long)gva1);
+	virt_pg_map(vm, gva1, gpa1);  /* Read-write mapping */
+
+	gva2 = GUEST_TEST_GVA2;
+	gpa2 = vm_phy_page_alloc(vm, vm->page_size, vm->memslots[MEM_REGION_TEST_DATA]);
+	printf("Allocated GPA2: 0x%lx for GVA2: 0x%lx\n", (unsigned long)gpa2, (unsigned long)gva2);
+	virt_pg_map_readonly(vm, gva2, gpa2);  /* Read-only mapping */
+
+	/*
+	 * The vCPU must be run at least once to initialize the system
+	 * registers needed for guest address translation.
+	 */
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	/* Verify the first mapping (read-write) translates correctly. */
+	memset(&tr, 0, sizeof(tr));
+	tr.linear_address = gva1;
+	vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+
+	printf("RW mapping: GVA=0x%lx -> GPA=0x%llx, valid=%d, writeable=%d\n",
+	       (unsigned long)gva1, (unsigned long long)tr.physical_address,
+	       tr.valid, tr.writeable);
+	TEST_ASSERT(tr.valid, "Translation should succeed for mapped GVA");
+	TEST_ASSERT_EQ(tr.physical_address, gpa1);
+	TEST_ASSERT(tr.writeable, "Read-write GVA should be writeable");
+
+	/* Verify the second mapping (read-only) translates correctly. */
+	memset(&tr, 0, sizeof(tr));
+	tr.linear_address = gva2;
+	vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+
+	printf("RO mapping: GVA=0x%lx -> GPA=0x%llx, valid=%d, writeable=%d\n",
+	       (unsigned long)gva2, (unsigned long long)tr.physical_address,
+	       tr.valid, tr.writeable);
+	TEST_ASSERT(tr.valid, "Translation should succeed for mapped GVA");
+	TEST_ASSERT_EQ(tr.physical_address, gpa2);
+	TEST_ASSERT(!tr.writeable, "Read-only GVA should not be writeable");
+
+	/* Verify that an unmapped GVA is reported as invalid. */
+	memset(&tr, 0, sizeof(tr));
+	tr.linear_address = GUEST_UNMAPPED_GVA;
+	vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+
+	printf("Unmapped: GVA=0x%lx -> GPA=0x%llx, valid=%d, writeable=%d\n",
+	       (unsigned long)GUEST_UNMAPPED_GVA, (unsigned long long)tr.physical_address,
+	       tr.valid, tr.writeable);
+	TEST_ASSERT(!tr.valid, "Translation should fail for unmapped GVA");
+
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ