[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251112182506.7165-12-suravee.suthikulpanit@amd.com>
Date: Wed, 12 Nov 2025 18:25:03 +0000
From: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
To: <jgg@...dia.com>, <nicolinc@...dia.com>
CC: <linux-kernel@...r.kernel.org>, <robin.murphy@....com>, <will@...nel.org>,
<joro@...tes.org>, <kevin.tian@...el.com>, <jsnitsel@...hat.com>,
<vasant.hegde@....com>, <iommu@...ts.linux.dev>, <santosh.shukla@....com>,
<sairaj.arunkodilkar@....com>, <jon.grimm@....com>,
<prashanthpra@...gle.com>, <wvw@...gle.com>, <wnliu@...gle.com>,
<gptran@...gle.com>, <kpsingh@...gle.com>, <joao.m.martins@...cle.com>,
<alejandro.j.jimenez@...cle.com>, Suravee Suthikulpanit
<suravee.suthikulpanit@....com>
Subject: [PATCH v5 11/14] iommu/amd: Introduce gDomID-to-hDomID Mapping and handle parent domain invalidation
Each nested domain is assigned guest domain ID (gDomID), which guest OS
programs into guest Device Table Entry (gDTE). For each gDomID, the driver
assigns a corresponding host domain ID (hDomID), which will be programmed
into the host Device Table Entry (hDTE).
The hDomID is allocated during amd_iommu_alloc_domain_nested(),
and free during nested_domain_free(). The gDomID-to-hDomID mapping info
(struct guest_domain_mapping_info) is stored in a per-viommu xarray
(struct amd_iommu_viommu.gdomid_array), which is indexed by gDomID.
Note also that parent domain can be shared among struct iommufd_viommu.
Therefore, when hypervisor invalidates the nest parent domain, the AMD
IOMMU command INVALIDATE_IOMMU_PAGES must be issued for each hDomID in
the gdomid_array. This is handled by the iommu_flush_pages_v1_hdom_ids(),
where it iterates through struct protection_domain.viommu_list.
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
---
drivers/iommu/amd/amd_iommu_types.h | 23 +++++++++
drivers/iommu/amd/iommu.c | 35 +++++++++++++
drivers/iommu/amd/iommufd.c | 34 ++++++++++++
drivers/iommu/amd/nested.c | 80 +++++++++++++++++++++++++++++
4 files changed, 172 insertions(+)
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index e46f346fd6c5..734f6a753b3a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -498,6 +498,22 @@ struct pdom_iommu_info {
struct amd_iommu_viommu {
struct iommufd_viommu core;
struct protection_domain *parent; /* nest parent domain for this viommu */
+ struct list_head pdom_list; /* For protection_domain->viommu_list */
+
+ /*
+ * Per-vIOMMU guest domain ID to host domain ID mapping.
+ * Indexed by guest domain ID.
+ */
+ struct xarray gdomid_array;
+};
+
+/*
+ * Contains guest domain ID mapping info,
+ * which is stored in the struct xarray gdomid_array.
+ */
+struct guest_domain_mapping_info {
+ refcount_t users;
+ u32 hdom_id; /* Host domain ID */
};
/*
@@ -506,6 +522,7 @@ struct amd_iommu_viommu {
struct nested_domain {
struct iommu_domain domain; /* generic domain handle used by iommu core code */
u16 gdom_id; /* domain ID from gDTE */
+ struct guest_domain_mapping_info *gdom_info;
struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */
struct amd_iommu_viommu *viommu; /* AMD hw-viommu this nested domain belong to */
};
@@ -530,6 +547,12 @@ struct protection_domain {
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
+
+ /*
+ * Store reference to list of vIOMMUs, which use this protection domain.
+ * This will be used to look up host domain ID when flushing this domain.
+ */
+ struct list_head viommu_list;
};
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 724b8723b836..6a26e7a28141 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1513,6 +1513,29 @@ static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
iommu_completion_wait(iommu);
}
+static int iommu_flush_pages_v1_hdom_ids(struct protection_domain *pdom, u64 address, size_t size)
+{
+ int ret = 0;
+ struct amd_iommu_viommu *aviommu;
+
+ list_for_each_entry(aviommu, &pdom->viommu_list, pdom_list) {
+ unsigned long i;
+ struct guest_domain_mapping_info *gdom_info;
+ struct amd_iommu *iommu = container_of(aviommu->core.iommu_dev, struct amd_iommu, iommu);
+
+ xa_for_each(&aviommu->gdomid_array, i, gdom_info) {
+ struct iommu_cmd cmd;
+
+ pr_debug("%s: iommu=%#x, hdom_id=%#x\n", __func__,
+ iommu->devid, gdom_info->hdom_id);
+ build_inv_iommu_pages(&cmd, address, size, gdom_info->hdom_id,
+ IOMMU_NO_PASID, false);
+ ret |= iommu_queue_command(iommu, &cmd);
+ }
+ }
+ return ret;
+}
+
static void amd_iommu_flush_all(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
@@ -1661,6 +1684,17 @@ static int domain_flush_pages_v1(struct protection_domain *pdom,
ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
}
+ /*
+ * A domain w/ v1 table can be a nest parent, which can have
+ * multiple nested domains. Each nested domain has 1:1 mapping
+ * between gDomID and hDomID. Therefore, flush every hDomID
+ * associated to this nest parent domain.
+ *
+ * See drivers/iommu/amd/nested.c: amd_iommu_alloc_domain_nested()
+ */
+ if (!list_empty(&pdom->viommu_list))
+ ret |= iommu_flush_pages_v1_hdom_ids(pdom, address, size);
+
return ret;
}
@@ -2469,6 +2503,7 @@ static void protection_domain_init(struct protection_domain *domain)
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
+ INIT_LIST_HEAD(&domain->viommu_list);
xa_init(&domain->iommu_array);
}
diff --git a/drivers/iommu/amd/iommufd.c b/drivers/iommu/amd/iommufd.c
index eb6119bdcf12..bb53475f9171 100644
--- a/drivers/iommu/amd/iommufd.c
+++ b/drivers/iommu/amd/iommufd.c
@@ -9,6 +9,8 @@
#include "amd_iommu.h"
#include "amd_iommu_types.h"
+static const struct iommufd_viommu_ops amd_viommu_ops;
+
void *amd_iommufd_hw_info(struct device *dev, u32 *length, u32 *type)
{
struct iommu_hw_info_amd *hwinfo;
@@ -38,10 +40,42 @@ size_t amd_iommufd_get_viommu_size(struct device *dev, enum iommu_viommu_type vi
int amd_iommufd_viommu_init(struct iommufd_viommu *viommu, struct iommu_domain *parent,
const struct iommu_user_data *user_data)
{
+ unsigned long flags;
struct protection_domain *pdom = to_pdomain(parent);
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
+ xa_init(&aviommu->gdomid_array);
aviommu->parent = pdom;
+ viommu->ops = &amd_viommu_ops;
+
+ spin_lock_irqsave(&pdom->lock, flags);
+ list_add(&aviommu->pdom_list, &pdom->viommu_list);
+ spin_unlock_irqrestore(&pdom->lock, flags);
+
return 0;
}
+
+static void amd_iommufd_viommu_destroy(struct iommufd_viommu *viommu)
+{
+ unsigned long flags;
+ struct amd_iommu_viommu *entry, *next;
+ struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
+ struct protection_domain *pdom = aviommu->parent;
+
+ spin_lock_irqsave(&pdom->lock, flags);
+ list_for_each_entry_safe(entry, next, &pdom->viommu_list, pdom_list) {
+ if (entry == aviommu)
+ list_del(&entry->pdom_list);
+ }
+ spin_unlock_irqrestore(&pdom->lock, flags);
+
+}
+
+/*
+ * See include/linux/iommufd.h
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ */
+static const struct iommufd_viommu_ops amd_viommu_ops = {
+ .destroy = amd_iommufd_viommu_destroy,
+};
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
index dd3e53dd16ea..1bbcb16abecc 100644
--- a/drivers/iommu/amd/nested.c
+++ b/drivers/iommu/amd/nested.c
@@ -6,6 +6,7 @@
#define dev_fmt(fmt) "AMD-Vi: " fmt
#include <linux/iommu.h>
+#include <linux/refcount.h>
#include <uapi/linux/iommufd.h>
#include "amd_iommu.h"
@@ -68,6 +69,7 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
{
int ret;
struct nested_domain *ndom;
+ struct guest_domain_mapping_info *gdom_info, *curr;
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
@@ -92,7 +94,60 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
ndom->domain.type = IOMMU_DOMAIN_NESTED;
ndom->viommu = aviommu;
+ gdom_info = kzalloc(sizeof(*gdom_info), GFP_KERNEL);
+ if (!gdom_info)
+ goto out_err;
+
+ /*
+ * Normally, when a guest has multiple pass-through devices,
+ * the IOMMU driver setup DTEs with the same stage-2 table and
+ * use the same host domain ID (hDomId). In case of nested translation,
+ * if the guest setup different stage-1 tables with same PASID,
+ * IOMMU would use the same TLB tag. This will results in TLB
+ * aliasing issue.
+ *
+ * The guest is assigning gDomIDs based on its own algorithm for managing
+ * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
+ * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
+ * to a single hDomID. This is done using an xarray in the vIOMMU to
+ * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
+ * command must be issued for each hDomID in the xarray.
+ */
+ curr = xa_cmpxchg(&aviommu->gdomid_array,
+ ndom->gdom_id, NULL, gdom_info, GFP_ATOMIC);
+ if (curr) {
+ if (xa_err(curr)) {
+ ret = -EINVAL;
+ goto out_err_gdom_info;
+ } else {
+ /* The gDomID already exist */
+ pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
+ __func__, ndom->gdom_id, curr->hdom_id);
+ refcount_inc(&curr->users);
+ ndom->gdom_info = curr;
+ kfree(gdom_info);
+ return &ndom->domain;
+ }
+ }
+
+ /* The gDomID does not exist. We allocate new hdom_id */
+ gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
+ if (gdom_info->hdom_id <= 0) {
+ xa_cmpxchg(&aviommu->gdomid_array,
+ ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
+ ret = -ENOSPC;
+ goto out_err_gdom_info;
+ }
+
+ refcount_set(&gdom_info->users, 1);
+ ndom->gdom_info = gdom_info;
+ pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
+ __func__, ndom->gdom_id, gdom_info->hdom_id);
+
return &ndom->domain;
+
+out_err_gdom_info:
+ kfree(gdom_info);
out_err:
kfree(ndom);
return ERR_PTR(ret);
@@ -100,8 +155,33 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
static void nested_domain_free(struct iommu_domain *dom)
{
+ struct guest_domain_mapping_info *curr;
struct nested_domain *ndom = to_ndomain(dom);
+ struct amd_iommu_viommu *aviommu = ndom->viommu;
+
+ if (!refcount_dec_and_test(&ndom->gdom_info->users))
+ return;
+ /*
+ * The refcount for the gdom_id to hdom_id mapping is zero.
+ * It is now safe to remove the mapping.
+ */
+ curr = xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
+ ndom->gdom_info, NULL, GFP_ATOMIC);
+ if (curr) {
+ if (xa_err(curr)) {
+ pr_err("%s: Failed to free nested domain gdom_id=%#x\n",
+ __func__, ndom->gdom_id);
+ return;
+ }
+
+ /* success */
+ pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
+ __func__, ndom->gdom_id, curr->hdom_id);
+ kfree(curr);
+ }
+
+ amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
kfree(ndom);
}
--
2.34.1
Powered by blists - more mailing lists