[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <596ed125-c995-4e2b-824c-b7929c852754@oracle.com>
Date: Thu, 24 Apr 2025 13:19:43 +0100
From: Joao Martins <joao.m.martins@...cle.com>
To: Ankit Soni <Ankit.Soni@....com>, iommu@...ts.linux.dev
Cc: suravee.suthikulpanit@....com, joro@...tes.org, will@...nel.org,
robin.murphy@....com, linux-kernel@...r.kernel.org,
Alejandro Jimenez <alejandro.j.jimenez@...cle.com>,
David Woodhouse <dwmw2@...radead.org>
Subject: Re: [PATCH 1/2] iommu/amd: Add HATDis feature support
On 23/04/2025 07:50, Ankit Soni wrote:
> Current AMD IOMMU assumes Host Address Translation (HAT) is always
> supported, and Linux kernel enables this capability by default. However,
> in case of emulated and virtualized IOMMU, this might not be the case.
+Alejandro as he is filling that gap
> For example,current QEMU-emulated AMD vIOMMU does not support host
> translation for VFIO pass-through device, but the interrupt remapping
> support is required for x2APIC (i.e. kvm-msi-ext-dest-id is also not
> supported by the guest OS). This would require the guest kernel to boot
> with guest kernel option iommu=pt to by-pass the initialization of
> host (v1) table.
>
> The AMD I/O Virtualization Technology (IOMMU) Specification Rev 3.10 [1]>
introduces a new flag 'HATDis' in the IVHD 11h IOMMU attributes to indicate
> that HAT is not supported on a particular IOMMU instance.
>
> Therefore, modifies the AMD IOMMU driver to detect the new HATDis
> attributes, and disable host translation and switch to use guest
> translation if it is available. Otherwise, the driver will disable DMA
> translation.
>
> [1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf
>
> Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
> Signed-off-by: Ankit Soni <Ankit.Soni@....com>
> ---
> drivers/iommu/amd/amd_iommu.h | 1 +
> drivers/iommu/amd/amd_iommu_types.h | 6 +++++-
> drivers/iommu/amd/init.c | 23 +++++++++++++++++++++--
> drivers/iommu/amd/iommu.c | 13 +++++++++++++
> 4 files changed, 40 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
> index 220c598b7e14..bb14c4800dd0 100644
> --- a/drivers/iommu/amd/amd_iommu.h
> +++ b/drivers/iommu/amd/amd_iommu.h
> @@ -43,6 +43,7 @@ extern int amd_iommu_guest_ir;
> extern enum protection_domain_mode amd_iommu_pgtable;
> extern int amd_iommu_gpt_level;
> extern unsigned long amd_iommu_pgsize_bitmap;
> +extern bool amd_iommu_hatdis;
>
> /* Protection domain ops */
> void amd_iommu_init_identity_domain(void);
> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> index 5089b58e528a..284ff4309660 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -460,6 +460,9 @@
> /* IOMMU Feature Reporting Field (for IVHD type 10h */
> #define IOMMU_FEAT_GASUP_SHIFT 6
>
> +/* IOMMU HATDIS for IVHD type 11h and 40h */
> +#define IOMMU_IVHD_ATTR_HATDIS_SHIFT 0
> +
> /* IOMMU Extended Feature Register (EFR) */
> #define IOMMU_EFR_XTSUP_SHIFT 2
> #define IOMMU_EFR_GASUP_SHIFT 7
> @@ -558,7 +561,8 @@ struct amd_io_pgtable {
> };
>
> enum protection_domain_mode {
> - PD_MODE_V1 = 1,
> + PD_MODE_NONE,
> + PD_MODE_V1,
> PD_MODE_V2,
> };
>
> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
> index dd9e26b7b718..f71b236c2af2 100644
> --- a/drivers/iommu/amd/init.c
> +++ b/drivers/iommu/amd/init.c
> @@ -151,7 +151,7 @@ struct ivmd_header {
> bool amd_iommu_dump;
> bool amd_iommu_irq_remap __read_mostly;
>
> -enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
> +enum protection_domain_mode amd_iommu_pgtable = PD_MODE_NONE;
> /* Guest page table level */
> int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
>
> @@ -168,6 +168,9 @@ static int amd_iommu_target_ivhd_type;
> u64 amd_iommu_efr;
> u64 amd_iommu_efr2;
>
> +/* dma translation not supported*/
> +bool amd_iommu_hatdis;
> +
> /* SNP is enabled on the system? */
> bool amd_iommu_snp_en;
> EXPORT_SYMBOL(amd_iommu_snp_en);
> @@ -1798,6 +1801,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
> if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
> amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
>
> + if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
> + pr_warn_once("Host Address Translation is not supported.\n");
> + amd_iommu_hatdis = true;
> + }
> +
> early_iommu_features_init(iommu, h);
>
> break;
> @@ -2582,7 +2590,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
> u32 devid;
> struct dev_table_entry *dev_table = pci_seg->dev_table;
>
> - if (dev_table == NULL)
> + if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
> return;
>
> for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
> @@ -3095,6 +3103,17 @@ static int __init early_amd_iommu_init(void)
> }
> }
>
> + if (amd_iommu_hatdis) {
> + if (amd_iommu_v2_pgtbl_supported())
> + amd_iommu_pgtable = PD_MODE_V2;
> + } else if (amd_iommu_pgtable == PD_MODE_NONE)
> + /*
> + * If v1 page table is supported (i.e., amd_iommu_hatdis == 0)
> + * and page table type is not specified in command line, then
> + * use v1 page table.
> + */
> + amd_iommu_pgtable = PD_MODE_V1;
> +
> /* Disable any previously enabled IOMMUs */
> if (!is_kdump_kernel() || amd_iommu_disabled)
> disable_iommus();
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index be8761bbef0f..0ebc264726da 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2393,6 +2393,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
> pci_max_pasids(to_pci_dev(dev)));
> }
>
> + if (amd_iommu_pgtable == PD_MODE_NONE) {
> + pr_warn_once("%s: DMA translation not supported by iommu.\n",
> + __func__);
> + iommu_dev = ERR_PTR(-ENODEV);
> + goto out_err;
> + }
> +
> out_err:
>
> iommu_completion_wait(iommu);
> @@ -2480,6 +2487,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
> case PD_MODE_V2:
> fmt = AMD_IOMMU_V2;
> break;
> + case PD_MODE_NONE:
> + WARN_ON_ONCE(1);
> + return -EPERM;
> }
>
> domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
> @@ -2501,6 +2511,9 @@ static inline u64 dma_max_address(enum protection_domain_mode pgtable)
>
> static bool amd_iommu_hd_support(struct amd_iommu *iommu)
> {
> + if (amd_iommu_hatdis)
> + return false;
> +
> return iommu && (iommu->features & FEATURE_HDSUP);
> }
>
It's strange we seem to somehow have host translation disabled, while it
advertises other translation-related features like the normal case.
In any case we should probably follow Intel's example (which does similar thing
to HATSDis) where we only call invoke IOMMU groups
iommu_device_register()/iommu_device_sysfs_add() with DMA translation enabled?
That should simplify most of the patch as those codepaths are not reachable via
kernel/userspace? Unless I am missing something ofc
See also commit c40aaaac10 ("iommu/vt-d: Gracefully handle DMAR units with no
supported address widths"). I am not sure what else is the closest example here
besides intel-iommu equivalent.
Powered by blists - more mailing lists