[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251006145900.GT3360665@nvidia.com>
Date: Mon, 6 Oct 2025 11:59:00 -0300
From: Jason Gunthorpe <jgg@...dia.com>
To: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
Cc: nicolinc@...dia.com, linux-kernel@...r.kernel.org, robin.murphy@....com,
will@...nel.org, joro@...tes.org, kevin.tian@...el.com,
jsnitsel@...hat.com, vasant.hegde@....com, iommu@...ts.linux.dev,
santosh.shukla@....com, sairaj.arunkodilkar@....com,
jon.grimm@....com, prashanthpra@...gle.com, wvw@...gle.com,
wnliu@...gle.com, gptran@...gle.com, kpsingh@...gle.com,
joao.m.martins@...cle.com, alejandro.j.jimenez@...cle.com
Subject: Re: [PATCH v2 11/12] iommu/amd: Add support for nested domain
attach/detach
On Wed, Oct 01, 2025 at 06:09:53AM +0000, Suravee Suthikulpanit wrote:
> +static void set_dte_nested(struct amd_iommu *iommu,
> + struct dev_table_entry *gdte,
> + struct nested_domain *ndom,
> + struct iommu_dev_data *dev_data)
> +{
> + struct dev_table_entry *initial_dte;
> + struct dev_table_entry new = {0};
> + struct protection_domain *pdom = dev_data->parent;
No, this is ndom->parent.
The parent is NOT required to be attached to the device already.
> + if (WARN_ON(!ndom || !pdom || (pdom->iop.mode == PAGE_MODE_NONE)))
> + return;
> +
> + amd_iommu_make_clear_dte(dev_data, &new);
> +
> + new.data[0] |= iommu_virt_to_phys(pdom->iop.root);
> + new.data[0] |= FIELD_PREP(DTE_MODE_MASK, pdom->iop.mode);
> + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_TV;
> + new.data[0] |= (DTE_FLAG_PPR & gdte->data[0]);
> + if (pdom->dirty_tracking)
> + new.data[0] |= DTE_FLAG_HAD;
> +
> + if (dev_data->ats_enabled)
> + new.data[1] |= DTE_FLAG_IOTLB;
This sequence should be in some set_dte_gcr3() ??
> + /*
> + * Restore cached persistent DTE bits, which can be set by information
> + * in IVRS table. See set_dev_entry_from_acpi().
> + */
> + initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);
> + if (initial_dte) {
> + new.data128[0] |= initial_dte->data128[0];
> + new.data128[1] |= initial_dte->data128[1];
> + }
This should go into amd_iommu_make_clear_dte() I think, and refactor
it out of iommu_update_dte256() ?
Every created DTE needs these bits set, right?
> +
> + /* Guest translation stuff */
> + new.data[0] |= (gdte->data[0] &
> + (DTE_GLX | DTE_FLAG_GV | DTE_FLAG_GIOV));
> +
> + /* GCR3 table */
> + new.data[0] |= (gdte->data[0] & DTE_GCR3_14_12);
> + new.data[1] |= (gdte->data[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31));
> +
> + /* Guest paging mode */
> + new.data[2] |= (gdte->data[2] & DTE_GPT_LEVEL_MASK);
I didn't see anything validating gdte has only permitted set bits in
the prior patch?
If this is goint to decode array item by item then why not use struct
iommu_hwpt_amd_guest in the nested_domain ?
> +static int nested_attach_device(struct iommu_domain *dom, struct device *dev)
> +{
> + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
> + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
> + struct nested_domain *ndom = to_ndomain(dom);
> + struct dev_table_entry *gdte = &ndom->guest_dte;
> + int ret = 0;
> +
> + if (dev_data->ndom == ndom)
> + return ret;
> +
> + if (!dev_is_pci(dev))
> + return -EINVAL;
Why?
> + /* Currently only support GCR3TRPMode with nested translation */
> + if (!check_feature2(FEATURE_GCR3TRPMODE))
> + return -EOPNOTSUPP;
This is impossible since we can't allocate a nest parent. If you want
to make a redundent check then call is_nest_parent_supported()
> + /* We need to check host capability before setting the mode */
> + if ((FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->data[2]) == GUEST_PGTABLE_5_LEVEL) &&
> + (amd_iommu_gpt_level < PAGE_MODE_5_LEVEL))
> + return -EOPNOTSUPP;
I wonder if this should be done during alloc
> + WARN_ON(dev_data->ndom);
> +
> + dev_data->ndom = ndom;
Useless?
> + mutex_lock(&dev_data->mutex);
> +
> + /* Update device table entry */
> + set_dte_nested(iommu, gdte, ndom, dev_data);
> + amd_iommu_device_flush_dte(dev_data);
> + amd_iommu_completion_wait(iommu);
Hurray
Jason
Powered by blists - more mailing lists