[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251117184815.1027271-11-smostafa@google.com>
Date: Mon, 17 Nov 2025 18:47:57 +0000
From: Mostafa Saleh <smostafa@...gle.com>
To: linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
kvmarm@...ts.linux.dev, iommu@...ts.linux.dev
Cc: catalin.marinas@....com, will@...nel.org, maz@...nel.org,
oliver.upton@...ux.dev, joey.gouly@....com, suzuki.poulose@....com,
yuzenghui@...wei.com, joro@...tes.org, jean-philippe@...aro.org, jgg@...pe.ca,
praan@...gle.com, danielmentz@...gle.com, mark.rutland@....com,
qperret@...gle.com, tabba@...gle.com, Mostafa Saleh <smostafa@...gle.com>
Subject: [PATCH v5 10/27] KVM: arm64: iommu: Add memory pool
IOMMU drivers would require to allocate memory for the shadow page
table. Similar to the host stage-2 CPU page table, the IOMMU pool
is allocated early from the carveout and it's memory is added in
a pool which the IOMMU driver can allocate from and reclaim at
run time.
As this is too early for drivers to use init calls, a default value
can be set in the kernel config through IOMMU_POOL_PAGES, which
then can be overridden later from the kernel command line:
"kvm-arm.hyp_iommu_pages".
Later when the driver registers, it will pass how many pages it
needs, and if it was less than what was allocated, it will fail
to register.
Signed-off-by: Mostafa Saleh <smostafa@...gle.com>
---
.../admin-guide/kernel-parameters.txt | 4 +++
arch/arm64/include/asm/kvm_host.h | 3 +-
arch/arm64/kvm/Kconfig | 7 +++++
arch/arm64/kvm/hyp/include/nvhe/iommu.h | 5 ++-
arch/arm64/kvm/hyp/nvhe/iommu/iommu.c | 20 +++++++++++-
arch/arm64/kvm/hyp/nvhe/setup.c | 16 +++++++++-
arch/arm64/kvm/iommu.c | 31 ++++++++++++++++++-
arch/arm64/kvm/pkvm.c | 1 +
8 files changed, 82 insertions(+), 5 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6c42061ca20e..f843d10a3dfc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3059,6 +3059,10 @@
trap: set WFI instruction trap
notrap: clear WFI instruction trap
+ kvm-arm.hyp_iommu_pages=
+ [KVM, ARM, EARLY]
+ Number of pages allocated for the IOMMU pool from the
+ KVM carveout when running in protected mode.
kvm_cma_resv_ratio=n [PPC,EARLY]
Reserves given percentage from system memory area for
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fb2551ba8798..5496c52d0163 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1654,7 +1654,8 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
#ifndef __KVM_NVHE_HYPERVISOR__
struct kvm_iommu_ops;
-int kvm_iommu_register_driver(struct kvm_iommu_ops *hyp_ops);
+int kvm_iommu_register_driver(struct kvm_iommu_ops *hyp_ops, size_t pool_pages);
+size_t kvm_iommu_pages(void);
#endif
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4f803fd1c99a..6a1bd82a0d07 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -83,4 +83,11 @@ config PTDUMP_STAGE2_DEBUGFS
If in doubt, say N.
+config IOMMU_POOL_PAGES
+ hex "Number of pages reserved for IOMMU pool"
+ depends on KVM && IOMMU_SUPPORT
+ default 0x0
+ help
+ IOMMU pool is used with protected mode to allocated IOMMU drivers page tables.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp/include/nvhe/iommu.h b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
index 219363045b1c..9f4906c6dcc9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/iommu.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
@@ -10,8 +10,11 @@ struct kvm_iommu_ops {
void (*host_stage2_idmap)(phys_addr_t start, phys_addr_t end, int prot);
};
-int kvm_iommu_init(void);
+int kvm_iommu_init(void *pool_base, size_t nr_pages);
void kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot);
+void *kvm_iommu_donate_pages(u8 order);
+void kvm_iommu_reclaim_pages(void *ptr);
+
#endif /* __ARM64_KVM_NVHE_IOMMU_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
index 414bd4c97690..a0df34ecf6b0 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
@@ -15,6 +15,7 @@ struct kvm_iommu_ops *kvm_iommu_ops;
/* Protected by host_mmu.lock */
static bool kvm_idmap_initialized;
+static struct hyp_pool iommu_pages_pool;
static inline int pkvm_to_iommu_prot(enum kvm_pgtable_prot prot)
{
@@ -72,7 +73,7 @@ static int kvm_iommu_snapshot_host_stage2(void)
return ret;
}
-int kvm_iommu_init(void)
+int kvm_iommu_init(void *pool_base, size_t nr_pages)
{
int ret;
@@ -80,6 +81,13 @@ int kvm_iommu_init(void)
!kvm_iommu_ops->host_stage2_idmap)
return -ENODEV;
+ if (nr_pages) {
+ ret = hyp_pool_init(&iommu_pages_pool, hyp_virt_to_pfn(pool_base),
+ nr_pages, 0);
+ if (ret)
+ return ret;
+ }
+
ret = kvm_iommu_ops->init();
if (ret)
return ret;
@@ -95,3 +103,13 @@ void kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
return;
kvm_iommu_ops->host_stage2_idmap(start, end, pkvm_to_iommu_prot(prot));
}
+
+void *kvm_iommu_donate_pages(u8 order)
+{
+ return hyp_alloc_pages(&iommu_pages_pool, order);
+}
+
+void kvm_iommu_reclaim_pages(void *ptr)
+{
+ hyp_put_page(&iommu_pages_pool, ptr);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index de79803e7439..c245ea88c480 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -22,6 +22,13 @@
unsigned long hyp_nr_cpus;
+/* See kvm_iommu_pages() */
+#ifdef CONFIG_IOMMU_POOL_PAGES
+size_t hyp_kvm_iommu_pages = CONFIG_IOMMU_POOL_PAGES;
+#else
+size_t hyp_kvm_iommu_pages;
+#endif
+
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
(unsigned long)__per_cpu_start)
@@ -33,6 +40,7 @@ static void *selftest_base;
static void *ffa_proxy_pages;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static struct hyp_pool hpool;
+static void *iommu_base;
static int divide_memory_pool(void *virt, unsigned long size)
{
@@ -70,6 +78,12 @@ static int divide_memory_pool(void *virt, unsigned long size)
if (!ffa_proxy_pages)
return -ENOMEM;
+ if (hyp_kvm_iommu_pages) {
+ iommu_base = hyp_early_alloc_contig(hyp_kvm_iommu_pages);
+ if (!iommu_base)
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -329,7 +343,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = kvm_iommu_init();
+ ret = kvm_iommu_init(iommu_base, hyp_kvm_iommu_pages);
if (ret)
goto out;
diff --git a/arch/arm64/kvm/iommu.c b/arch/arm64/kvm/iommu.c
index c9041dcb6c57..6143fd3e1de3 100644
--- a/arch/arm64/kvm/iommu.c
+++ b/arch/arm64/kvm/iommu.c
@@ -7,9 +7,38 @@
#include <linux/kvm_host.h>
extern struct kvm_iommu_ops *kvm_nvhe_sym(kvm_iommu_ops);
+extern size_t kvm_nvhe_sym(hyp_kvm_iommu_pages);
-int kvm_iommu_register_driver(struct kvm_iommu_ops *hyp_ops)
+int kvm_iommu_register_driver(struct kvm_iommu_ops *hyp_ops, size_t pool_pages)
{
+ /* See kvm_iommu_pages() */
+ if (pool_pages > kvm_nvhe_sym(hyp_kvm_iommu_pages)) {
+ kvm_err("Missing memory for the IOMMU pool, need 0x%zx pages, check kvm-arm.hyp_iommu_pages",
+ pool_pages);
+ return -ENOMEM;
+ }
+
kvm_nvhe_sym(kvm_iommu_ops) = hyp_ops;
return 0;
}
+
+size_t kvm_iommu_pages(void)
+{
+ /*
+ * This is called very early during setup_arch() where no initcalls,
+ * so this has to call specific functions per each KVM driver.
+ * So we allow a config option that can set the defaul value for
+ * the IOMMU pool that can overridden by a command line option.
+ * When the driver registers it will pass the number pages needed
+ * for it's page tables, if less that what the system has already
+ * allocated we fail.
+ */
+ return kvm_nvhe_sym(hyp_kvm_iommu_pages);
+}
+
+/* Number of pages to reserve for iommu pool*/
+static int __init early_hyp_iommu_pages(char *arg)
+{
+ return kstrtoul(arg, 10, &kvm_nvhe_sym(hyp_kvm_iommu_pages));
+}
+early_param("kvm-arm.hyp_iommu_pages", early_hyp_iommu_pages);
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 24f0f8a8c943..b9d212b48c04 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -63,6 +63,7 @@ void __init kvm_hyp_reserve(void)
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
hyp_mem_pages += pkvm_selftest_pages();
hyp_mem_pages += hyp_ffa_proxy_pages();
+ hyp_mem_pages += kvm_iommu_pages();
/*
* Try to allocate a PMD-aligned region to reduce TLB pressure once
--
2.52.0.rc1.455.g30608eb744-goog
Powered by blists - more mailing lists