[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250820113009.5233-9-suravee.suthikulpanit@amd.com>
Date: Wed, 20 Aug 2025 11:30:09 +0000
From: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
To: <jgg@...dia.com>, <nicolinc@...dia.com>
CC: <linux-kernel@...r.kernel.org>, <robin.murphy@....com>, <will@...nel.org>,
<joro@...tes.org>, <kevin.tian@...el.com>, <jsnitsel@...hat.com>,
<vasant.hegde@....com>, <iommu@...ts.linux.dev>, <santosh.shukla@....com>,
<sairaj.arunkodilkar@....com>, <jon.grimm@....com>,
<prashanthpra@...gle.com>, <wvw@...gle.com>, <wnliu@...gle.com>,
<gptran@...gle.com>, <kpsingh@...gle.com>, Suravee Suthikulpanit
<suravee.suthikulpanit@....com>
Subject: [PATCH 8/8] iommu/amd: Add support for nested domain attach/detach
Programs guest translation settings in the host DTE when attaches the
nested domain to a device.
Also, enable the GCR3TRPMode feature when supported.
Note that nested translation is only supported with the GCR3TRP mode.
When it is enabled, the AMD IOMMU driver programs the GCR3 Table Root
Pointer field of the device table entry with the GPA provided by the guest.
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
---
drivers/iommu/amd/amd_iommu_types.h | 5 ++
drivers/iommu/amd/init.c | 3 ++
drivers/iommu/amd/iommu.c | 49 +++++++++++++++---
drivers/iommu/amd/nested.c | 80 +++++++++++++++++++++++++++--
4 files changed, 127 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 94f51a09b364..f8c392aadeb1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -191,6 +191,7 @@
#define CONTROL_EPH_EN 45
#define CONTROL_XT_EN 50
#define CONTROL_INTCAPXT_EN 51
+#define CONTROL_GCR3TRPMODE 58
#define CONTROL_IRTCACHEDIS 59
#define CONTROL_SNPAVIC_EN 61
@@ -420,6 +421,8 @@
#define DTE_FLAG_V BIT_ULL(0)
#define DTE_FLAG_TV BIT_ULL(1)
#define DTE_FLAG_HAD (3ULL << 7)
+#define DTE_FLAG_PPR BIT_ULL(52)
+#define DTE_FLAG_GLX BIT_ULL(53)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_GLX GENMASK_ULL(57, 56)
@@ -555,6 +558,7 @@ struct amd_irte_ops;
struct gcr3_tbl_info {
u64 *gcr3_tbl; /* Guest CR3 table */
+ u64 trp_gpa; /* Guest CR3 TRP GPA for nested domain */
int glx; /* Number of levels for GCR3 table */
u32 pasid_cnt; /* Track attached PASIDs */
u16 domid; /* Per device domain ID */
@@ -610,6 +614,7 @@ struct protection_domain {
struct protection_domain *parent; /* Nested parent domain */
struct iommu_hwpt_amd_v2 guest_hwpt;
+ u16 guest_paging_mode; /* Guest paging mode */
};
/*
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 8de689b2c5ed..b340afd6901f 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -971,6 +971,9 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
return;
iommu_feature_enable(iommu, CONTROL_GT_EN);
+
+ if (check_feature2(FEATURE_GCR3TRPMODE))
+ iommu_feature_enable(iommu, CONTROL_GCR3TRPMODE);
}
/* sets a specific bit in the device table entry. */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index ea790a8997ee..935eaffb6814 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1856,6 +1856,18 @@ static void free_gcr3_tbl_level2(u64 *tbl)
}
}
+static inline bool amd_iommu_domain_is_nested(struct protection_domain *pdom)
+{
+ return (pdom && (pdom->domain.type == IOMMU_DOMAIN_NESTED));
+}
+
+static inline bool has_gcr3_table(struct gcr3_tbl_info *gcr3_info)
+{
+ if (!gcr3_info || (!gcr3_info->gcr3_tbl && !gcr3_info->trp_gpa))
+ return false;
+ return true;
+}
+
static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
{
if (gcr3_info->glx == 2)
@@ -1901,7 +1913,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
- if (gcr3_info->gcr3_tbl)
+ if (has_gcr3_table(gcr3_info))
return -EBUSY;
/* Allocate per device domain ID */
@@ -2023,17 +2035,31 @@ static void set_dte_gcr3_table(struct amd_iommu *iommu,
struct dev_table_entry *target)
{
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ struct protection_domain *pdom = dev_data->domain;
u64 gcr3;
- if (!gcr3_info->gcr3_tbl)
+ if (!has_gcr3_table(gcr3_info))
return;
- pr_debug("%s: devid=%#x, glx=%#x, giov=%#x, gcr3_tbl=%#llx\n",
+ /* We need to check host capability before setting the mode. */
+ if ((pdom->guest_paging_mode == PAGE_MODE_5_LEVEL) &&
+ (amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)) {
+ pr_err("Cannot support Guest paging mode=%#x (dom_id=%#x).\n",
+ pdom->guest_paging_mode, pdom->id);
+ return;
+ }
+
+ pr_debug("%s: devid=%#x, glx=%#x, giov=%#x, gcr3_tbl=%#llx, trp_gpa=%#llx, type=%#x\n",
__func__, dev_data->devid, gcr3_info->glx, gcr3_info->giov,
- (unsigned long long)gcr3_info->gcr3_tbl);
+ (unsigned long long)gcr3_info->gcr3_tbl, gcr3_info->trp_gpa,
+ pdom->domain.type);
gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
+ /* For nested domain, use GCR3 GPA provided */
+ if (gcr3_info->trp_gpa)
+ gcr3 = gcr3_info->trp_gpa;
+
target->data[0] |= DTE_FLAG_GV |
FIELD_PREP(DTE_GLX, gcr3_info->glx) |
FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
@@ -2044,7 +2070,7 @@ static void set_dte_gcr3_table(struct amd_iommu *iommu,
FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
/* Guest page table can only support 4 and 5 levels */
- if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
+ if (pdom->guest_paging_mode == PAGE_MODE_5_LEVEL)
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
else
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
@@ -2061,7 +2087,14 @@ static void set_dte_entry(struct amd_iommu *iommu,
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
- if (gcr3_info && gcr3_info->gcr3_tbl)
+ /*
+ * For nested domain, use parent domain to setup v1 table
+ * information and domain id.
+ */
+ if (amd_iommu_domain_is_nested(domain))
+ domain = domain->parent;
+
+ if (has_gcr3_table(gcr3_info))
domid = dev_data->gcr3_info.domid;
else
domid = domain->id;
@@ -2293,7 +2326,8 @@ int __amd_iommu_attach_device(struct device *dev, struct protection_domain *doma
goto out;
/* Setup GCR3 table */
- if (pdom_is_sva_capable(domain)) {
+ if (!amd_iommu_domain_is_nested(domain) && pdom_is_sva_capable(domain)) {
+ pr_warn("%s: Allocating guest page table\n", __func__);
ret = init_gcr3_table(dev_data, domain);
if (ret) {
pdom_detach_iommu(iommu, domain);
@@ -2519,6 +2553,7 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
fmt = AMD_IOMMU_V1;
break;
case PD_MODE_V2:
+ domain->guest_paging_mode = amd_iommu_gpt_level;
fmt = AMD_IOMMU_V2;
break;
case PD_MODE_NONE:
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
index 09f2a455af33..c9bf44e6298d 100644
--- a/drivers/iommu/amd/nested.c
+++ b/drivers/iommu/amd/nested.c
@@ -12,9 +12,7 @@
#include "amd_iommu.h"
#include "amd_iommu_types.h"
-const struct iommu_domain_ops nested_domain_ops = {
- .free = amd_iommu_domain_free,
-};
+const struct iommu_domain_ops nested_domain_ops;
static int udata_to_iommu_hwpt_amd_v2(const struct iommu_user_data *user_data,
struct iommu_hwpt_amd_v2 *hwpt)
@@ -77,3 +75,79 @@ amd_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
kfree(pdom);
return ERR_PTR(-EINVAL);
}
+
+static inline u64 hwpt_to_gcr3_trp(u64 *dte)
+{
+ u64 gcr3;
+
+ gcr3 = (FIELD_GET(DTE_GCR3_14_12, dte[0]) << 12);
+ gcr3 |= (FIELD_GET(DTE_GCR3_30_15, dte[1]) << 15);
+ gcr3 |= (FIELD_GET(DTE_GCR3_51_31, dte[1]) << 31);
+ return gcr3;
+}
+
+static int nested_gcr3_update(struct protection_domain *pdom, struct device *dev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+ struct iommu_hwpt_amd_v2 *hwpt = &pdom->guest_hwpt;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (!pdev || !hwpt)
+ return -EINVAL;
+
+ /* Note: Currently only support GCR3TRPMode with nested translation */
+ if (!check_feature2(FEATURE_GCR3TRPMODE))
+ return -EOPNOTSUPP;
+
+ if (FIELD_GET(DTE_GPT_LEVEL_MASK, hwpt->dte[2]) == GUEST_PGTABLE_5_LEVEL)
+ pdom->guest_paging_mode = PAGE_MODE_5_LEVEL;
+ else
+ pdom->guest_paging_mode = PAGE_MODE_4_LEVEL;
+
+ dev_data->ppr = FIELD_GET(DTE_FLAG_PPR, hwpt->dte[0]);
+ dev_data->gcr3_info.glx = FIELD_GET(DTE_FLAG_GLX, hwpt->dte[0]);
+ dev_data->gcr3_info.giov = FIELD_GET(DTE_FLAG_GIOV, hwpt->dte[0]);
+ dev_data->gcr3_info.trp_gpa = hwpt_to_gcr3_trp(hwpt->dte);
+ /* Due to possible aliasing issue use nested domain ID */
+ dev_data->gcr3_info.domid = pdom->id;
+ pr_debug("%s: devid=%#x, domid=%#x, trp_gpa=%#llx, glx=%#x\n", __func__,
+ pci_dev_id(pdev),
+ dev_data->gcr3_info.domid,
+ dev_data->gcr3_info.trp_gpa,
+ dev_data->gcr3_info.glx);
+
+ return 0;
+}
+
+static int amd_iommu_nested_attach_device(struct iommu_domain *dom, struct device *dev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+ struct protection_domain *pdom = to_pdomain(dom);
+ struct pci_dev *pdev;
+ int ret;
+
+ if (dev_data->domain == pdom)
+ return 0;
+
+ ret = nested_gcr3_update(pdom, dev);
+ if (ret)
+ return ret;
+
+ if (dev_data->domain)
+ amd_iommu_detach_device(dev);
+
+ ret = __amd_iommu_attach_device(dev, pdom);
+ if (ret)
+ return ret;
+
+ pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+ if (pdev)
+ amd_iommu_pdev_enable_cap_ats(pdev);
+
+ return ret;
+}
+
+const struct iommu_domain_ops nested_domain_ops = {
+ .attach_dev = amd_iommu_nested_attach_device,
+ .free = amd_iommu_domain_free,
+};
--
2.34.1
Powered by blists - more mailing lists