lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250728135216.48084-34-aneesh.kumar@kernel.org>
Date: Mon, 28 Jul 2025 19:22:10 +0530
From: "Aneesh Kumar K.V (Arm)" <aneesh.kumar@...nel.org>
To: linux-coco@...ts.linux.dev,
	kvmarm@...ts.linux.dev
Cc: linux-pci@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	aik@....com,
	lukas@...ner.de,
	Samuel Ortiz <sameo@...osinc.com>,
	Xu Yilun <yilun.xu@...ux.intel.com>,
	Jason Gunthorpe <jgg@...pe.ca>,
	Suzuki K Poulose <Suzuki.Poulose@....com>,
	Steven Price <steven.price@....com>,
	Catalin Marinas <catalin.marinas@....com>,
	Marc Zyngier <maz@...nel.org>,
	Will Deacon <will@...nel.org>,
	Oliver Upton <oliver.upton@...ux.dev>,
	"Aneesh Kumar K.V (Arm)" <aneesh.kumar@...nel.org>
Subject: [RFC PATCH v1 33/38] KVM: arm64: CCA: handle dev mem map/unmap

Handle VM exit on DEV_MEM_MAP

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@...nel.org>
---
 arch/arm64/include/asm/rmi_cmds.h |  40 +++++++
 arch/arm64/include/asm/rmi_smc.h  |   5 +
 arch/arm64/kvm/rme-exit.c         |  39 +++++-
 arch/arm64/kvm/rme.c              | 190 ++++++++++++++++++++++++++++--
 drivers/vfio/pci/vfio_pci_core.c  |   1 +
 5 files changed, 262 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/rmi_cmds.h b/arch/arm64/include/asm/rmi_cmds.h
index fcf6b319e953..900e35dae740 100644
--- a/arch/arm64/include/asm/rmi_cmds.h
+++ b/arch/arm64/include/asm/rmi_cmds.h
@@ -638,4 +638,44 @@ static inline unsigned long rmi_vdev_complete(unsigned long rec_phys, unsigned l
 	return res.a0;
 }
 
+static inline int rmi_rtt_dev_mem_validate(unsigned long rd, unsigned long rec,
+					   unsigned long base, unsigned long top,
+					   unsigned long *out_top)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(SMC_RMI_RTT_DEV_MEM_VALIDATE, rd, rec, base, top, &res);
+
+	if (out_top)
+		*out_top = res.a1;
+
+	return res.a0;
+}
+
+static inline int rmi_dev_mem_map(unsigned long rd, unsigned long ipa,
+				  unsigned long level, unsigned long pa)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(SMC_RMI_DEV_MEM_MAP, rd, ipa, level, pa, &res);
+
+	return res.a0;
+}
+
+static inline int rmi_dev_mem_unmap(unsigned long rd, unsigned long ipa,
+				    unsigned long level, unsigned long *out_pa,
+				    unsigned long *out_ipa)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(SMC_RMI_DEV_MEM_UNMAP, rd, ipa, level, &res);
+
+	if (out_pa)
+		*out_pa = res.a1;
+	if (out_ipa)
+		*out_ipa = res.a2;
+
+	return res.a0;
+}
+
 #endif /* __ASM_RMI_CMDS_H */
diff --git a/arch/arm64/include/asm/rmi_smc.h b/arch/arm64/include/asm/rmi_smc.h
index 7073eccaec5f..ab169b375198 100644
--- a/arch/arm64/include/asm/rmi_smc.h
+++ b/arch/arm64/include/asm/rmi_smc.h
@@ -39,6 +39,7 @@
 
 #define SMC_RMI_RTT_READ_ENTRY		SMC_RMI_CALL(0x0161)
 #define SMC_RMI_RTT_UNMAP_UNPROTECTED	SMC_RMI_CALL(0x0162)
+#define SMC_RMI_RTT_DEV_MEM_VALIDATE	SMC_RMI_CALL(0x0163)
 
 #define SMC_RMI_PSCI_COMPLETE		SMC_RMI_CALL(0x0164)
 #define SMC_RMI_FEATURES		SMC_RMI_CALL(0x0165)
@@ -47,6 +48,9 @@
 #define SMC_RMI_RTT_INIT_RIPAS		SMC_RMI_CALL(0x0168)
 #define SMC_RMI_RTT_SET_RIPAS		SMC_RMI_CALL(0x0169)
 
+#define SMC_RMI_DEV_MEM_MAP		SMC_RMI_CALL(0x0172)
+#define SMC_RMI_DEV_MEM_UNMAP		SMC_RMI_CALL(0x0173)
+
 #define SMC_RMI_PDEV_COMMUNICATE        SMC_RMI_CALL(0x0175)
 #define SMC_RMI_PDEV_CREATE             SMC_RMI_CALL(0x0176)
 #define SMC_RMI_PDEV_DESTROY		SMC_RMI_CALL(0x0177)
@@ -84,6 +88,7 @@ enum rmi_ripas {
 	RMI_EMPTY = 0,
 	RMI_RAM = 1,
 	RMI_DESTROYED = 2,
+	RMI_DEV = 3,
 };
 
 #define RMI_NO_MEASURE_CONTENT	0
diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c
index 25948207fc5b..77829491805b 100644
--- a/arch/arm64/kvm/rme-exit.c
+++ b/arch/arm64/kvm/rme-exit.c
@@ -170,17 +170,44 @@ EXPORT_SYMBOL_GPL(realm_exit_dev_mem_map_handler);
 static int rec_exit_dev_mem_map(struct kvm_vcpu *vcpu)
 {
 	int ret;
+	struct kvm *kvm = vcpu->kvm;
+	struct realm *realm = &kvm->arch.realm;
 	struct realm_rec *rec = &vcpu->arch.rec;
+	unsigned long base = rec->run->exit.dev_mem_base;
+	unsigned long top = rec->run->exit.dev_mem_top;
+
+	if (!kvm_realm_is_private_address(realm, base) ||
+	    !kvm_realm_is_private_address(realm, top - 1)) {
+		vcpu_err(vcpu, "Invalid DEV_MEM_VALIDATE for %#lx - %#lx\n", base, top);
+		return -EINVAL;
+	}
 
+	/* See if coco driver want to look at the dev mem_map request */
 	if (realm_exit_dev_mem_map_handler) {
 		ret = (*realm_exit_dev_mem_map_handler)(rec);
-	} else {
-		kvm_pr_unimpl("Unsupported exit reason: %u\n",
-			      rec->run->exit.exit_reason);
-		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		ret = 0;
+		if (ret)
+			return ret;
 	}
-	return ret;
+
+#if 0
+	/* we don't need a memory fault exit for device mapping.
+	 * 1. On enter to rec, we map the device memory using dev_mem_map
+	   2. There is no fallocate, and we are not tracking this via memory attributes.
+	   If we need a fault exit, we need to differentiate it in VMM so that we don't
+	   map the private memory via tsm map ioctl.
+	 */
+	/*
+	 * Exit to VMM so that VMM can deny the validation, the actual
+	 * validation response is done on next entry
+	 */
+	kvm_prepare_memory_fault_exit(vcpu, base, top - base, false, false,
+				      true);
+
+	/* exit to hypervisor */
+	return -EFAULT;
+#else
+	return 1;
+#endif
 }
 
 static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
index d1c147aba2ed..11c8d47e3e9b 100644
--- a/arch/arm64/kvm/rme.c
+++ b/arch/arm64/kvm/rme.c
@@ -445,18 +445,27 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits)
 	WARN_ON(realm_tear_down_rtt_range(realm, 0, (1UL << ia_bits)));
 }
 
-static int realm_destroy_private_granule(struct realm *realm,
-					 unsigned long ipa,
+static int realm_destroy_private_granule(struct realm *realm, unsigned long ipa,
 					 unsigned long *next_addr,
-					 phys_addr_t *out_rtt)
+					 phys_addr_t *out_rtt,
+					 int *ripas)
 {
 	unsigned long rd = virt_to_phys(realm->rd);
 	unsigned long rtt_addr;
+	struct rtt_entry rtt_entry;
 	phys_addr_t rtt;
 	int ret;
 
+	ret = rmi_rtt_read_entry(rd, ipa, RMM_RTT_MAX_LEVEL, &rtt_entry);
+	if (ret != RMI_SUCCESS)
+		return -ENXIO;
+
 retry:
-	ret = rmi_data_destroy(rd, ipa, &rtt_addr, next_addr);
+	if (rtt_entry.ripas == RMI_DEV)
+		ret = rmi_dev_mem_unmap(rd, ipa, RMM_RTT_MAX_LEVEL, &rtt_addr, next_addr);
+	else
+		ret = rmi_data_destroy(rd, ipa, &rtt_addr, next_addr);
+
 	if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
 		if (*next_addr > ipa)
 			return 0; /* UNASSIGNED */
@@ -484,6 +493,7 @@ static int realm_destroy_private_granule(struct realm *realm,
 		return -ENXIO;
 
 	*out_rtt = rtt_addr;
+	*ripas = rtt_entry.ripas;
 
 	return 0;
 }
@@ -495,16 +505,16 @@ static int realm_unmap_private_page(struct realm *realm,
 	unsigned long end = ALIGN(ipa + 1, PAGE_SIZE);
 	unsigned long addr;
 	phys_addr_t out_rtt = PHYS_ADDR_MAX;
-	int ret;
+	int ret, ripas;
 
 	for (addr = ipa; addr < end; addr = *next_addr) {
 		ret = realm_destroy_private_granule(realm, addr, next_addr,
-						    &out_rtt);
+						    &out_rtt, &ripas);
 		if (ret)
 			return ret;
 	}
 
-	if (out_rtt != PHYS_ADDR_MAX) {
+	if (out_rtt != PHYS_ADDR_MAX && ripas != RMI_DEV) {
 		out_rtt = ALIGN_DOWN(out_rtt, PAGE_SIZE);
 		free_page((unsigned long)phys_to_virt(out_rtt));
 	}
@@ -1222,8 +1232,17 @@ static int realm_set_ipa_state(struct kvm_vcpu *vcpu,
 	struct kvm *kvm = vcpu->kvm;
 	int ret = ripas_change(kvm, vcpu, start, end, RIPAS_SET, top_ipa);
 
+#if 0
+	/*
+	 * We don't need to do this because a ripas change will take a memory
+	 * fault exit That results in stage2 invalidate which will take care of
+	 * unmap of both private and shared ipa.. IF we need to do this, we
+	 * should do it before ripas change, we look at ripas when unmapping the
+	 * private range.
+	 */
 	if (ripas == RMI_EMPTY && *top_ipa != start)
 		realm_unmap_private_range(kvm, start, *top_ipa, false);
+#endif
 
 	return ret;
 }
@@ -1492,6 +1511,159 @@ static void kvm_complete_ripas_change(struct kvm_vcpu *vcpu)
 	rec->run->exit.ripas_base = base;
 }
 
+/*
+ * Even though we can map larger block, since we need to delegate each granule.
+ * We map granule size and fold
+ */
+static int realm_dev_mem_map(struct kvm_vcpu *vcpu, unsigned long start_ipa,
+			     unsigned long end_ipa, phys_addr_t phys)
+{
+	int ret = 0;
+	unsigned long ipa;
+	struct kvm *kvm = vcpu->kvm;
+	struct realm *realm = &kvm->arch.realm;
+	phys_addr_t rd = virt_to_phys(realm->rd);
+	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+
+	for (ipa = start_ipa ; ipa < end_ipa; ipa += PAGE_SIZE) {
+
+		if (rmi_granule_delegate(phys))
+			return -EINVAL;
+
+		ret = rmi_dev_mem_map(rd, ipa, RMM_RTT_MAX_LEVEL, phys);
+
+		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
+			/* Create missing RTTs and retry */
+			int level = RMI_RETURN_INDEX(ret);
+
+			ret = realm_create_rtt_levels(realm, ipa, level,
+						      RMM_RTT_MAX_LEVEL,
+						      memcache);
+			WARN_ON(ret);
+			if (ret)
+				goto err_undelegate;
+
+			ret = rmi_dev_mem_map(rd, ipa, RMM_RTT_MAX_LEVEL, phys);
+		}
+		WARN_ON(ret);
+
+		if (ret)
+			goto err_undelegate;
+
+		phys += PAGE_SIZE;
+	}
+
+	/* : ipa = ALIGN(start_ipa, RMM_L2_BLOCK_SIZE) to be safer ?  */
+	for (ipa = start_ipa; ((ipa + RMM_L2_BLOCK_SIZE) < end_ipa); ipa += RMM_L2_BLOCK_SIZE)
+		fold_rtt(realm, ipa, RMM_RTT_BLOCK_LEVEL);
+
+	return 0;
+
+err_undelegate:
+	WARN_ON(rmi_granule_undelegate(phys));
+
+	while (ipa > start_ipa) {
+		unsigned long out_pa;
+
+		phys -= PAGE_SIZE;
+		ipa -= PAGE_SIZE;
+
+		WARN_ON(rmi_dev_mem_unmap(rd, ipa, RMM_RTT_MAX_LEVEL, &out_pa, NULL));
+
+		WARN_ON(phys != out_pa);
+		WARN_ON(rmi_granule_undelegate(out_pa));
+	}
+	return -ENXIO;
+}
+
+static int realm_dev_mem_validate(struct kvm_vcpu *vcpu,
+				  unsigned long start, unsigned long end,
+				  unsigned long *top_ipa)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct realm *realm = &kvm->arch.realm;
+	struct realm_rec *rec = &vcpu->arch.rec;
+	phys_addr_t rd_phys = virt_to_phys(realm->rd);
+	phys_addr_t rec_phys = virt_to_phys(rec->rec_page);
+	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	unsigned long ipa = start;
+	int ret = 0;
+
+	while (ipa < end) {
+		unsigned long next;
+
+		ret = rmi_rtt_dev_mem_validate(rd_phys, rec_phys, ipa, end, &next);
+
+		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
+			/*
+			 * FIXME!! We can't find the RTT error here, because
+			 * things are already setup by dev_mem_map before
+			 */
+			int walk_level = RMI_RETURN_INDEX(ret);
+			int level = find_map_level(realm, ipa, end);
+
+			/*
+			 * If the RMM walk ended early then more tables are
+			 * needed to reach the required depth to set the RIPAS.
+			 */
+			if (walk_level < level) {
+				ret = realm_create_rtt_levels(realm, ipa,
+							      walk_level,
+							      level,
+							      memcache);
+				/* Retry with RTTs created */
+				if (!ret)
+					continue;
+			} else {
+				ret = -EINVAL;
+			}
+
+			break;
+		} else if (RMI_RETURN_STATUS(ret) != RMI_SUCCESS) {
+			WARN(1, "Unexpected error in %s: %#x\n", __func__,
+			     ret);
+			ret = -EINVAL;
+			break;
+		}
+		ipa = next;
+	}
+
+	*top_ipa = ipa;
+
+	return ret;
+}
+
+static void kvm_complete_dev_mem_change(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct realm_rec *rec = &vcpu->arch.rec;
+	unsigned long base = rec->run->exit.dev_mem_base;
+	unsigned long top = rec->run->exit.dev_mem_top;
+	unsigned long pa = rec->run->exit.dev_mem_pa;
+	unsigned long top_ipa;
+	int ret;
+
+	do {
+		kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache,
+					   kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
+		write_lock(&kvm->mmu_lock);
+		/*
+		 * FIXME!! we need validate these values. Also PA need to be tied to the life cycle
+		 * of vfio device/file descriptor.
+		 */
+		ret = realm_dev_mem_map(vcpu, base, top, pa);
+		if (!ret)
+			ret = realm_dev_mem_validate(vcpu, base, top, &top_ipa);
+		write_unlock(&kvm->mmu_lock);
+		if (ret)
+			break;
+
+		base = top_ipa;
+	} while (top_ipa < top);
+
+	WARN(ret, "Unable to satisfy DEV_MEM_CHANGE for %#lx - %#lx\n", base, top);
+}
+
 /*
  * kvm_rec_pre_enter - Complete operations before entering a REC
  *
@@ -1520,6 +1692,10 @@ int kvm_rec_pre_enter(struct kvm_vcpu *vcpu)
 	case RMI_EXIT_RIPAS_CHANGE:
 		kvm_complete_ripas_change(vcpu);
 		break;
+	case RMI_EXIT_DEV_MEM_MAP:
+		kvm_complete_dev_mem_change(vcpu);
+		break;
+
 	}
 
 	return 1;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index afdb39c6aefd..264ee84d7ecd 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1718,6 +1718,7 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = {
 #endif
 };
 
+/* FIXME!! don't allow mmap once the device is TDISP locked and we did dev mem_map. */
 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
 {
 	struct vfio_pci_core_device *vdev =
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ