[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241016142237.GP3559746@nvidia.com>
Date: Wed, 16 Oct 2024 11:22:37 -0300
From: Jason Gunthorpe <jgg@...dia.com>
To: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
Cc: linux-kernel@...r.kernel.org, iommu@...ts.linux.dev, joro@...tes.org,
robin.murphy@....com, vasant.hegde@....com, kevin.tian@...el.com,
jon.grimm@....com, santosh.shukla@....com, pandoh@...gle.com,
kumaranand@...gle.com
Subject: Re: [PATCH v6 0/9] iommu/amd: Use 128-bit cmpxchg operation to
update DTE
On Wed, Oct 16, 2024 at 05:17:47AM +0000, Suravee Suthikulpanit wrote:
> This series modifies current implementation to use 128-bit cmpxchg to
> update DTE when needed as specified in the AMD I/O Virtualization
> Techonology (IOMMU) Specification.
>
> Please note that I have verified with the hardware designer, and they have
> confirmed that the IOMMU hardware has always been implemented with 256-bit
> read. The next revision of the IOMMU spec will be updated to correctly
> describe this part. Therefore, I have updated the implementation to avoid
> unnecessary flushing.
>
> Changes in v6:
>
> * Patch 2, 4, 7: Newly add
>
> * Patch 3, 5, 6, 7, 9: Add READ_ONCE() per Uros.
>
> * Patch 3:
> - Modify write_dte_[higher|lower]128() to avoid copying old DTE in the loop.
>
> * Patch 5:
> - Use dev_data->dte_cache to restore persistent DTE bits in set_dte_entry().
> - Simplify make_clear_dte():
> - Remove bit preservation logic.
> - Remove non-SNP check for setting TV since it should not be needed.
>
> * Patch 6:
> - Use find_dev_data(..., alias) since the dev_data might not have been allocated.
> - Move dev_iommu_priv_set() to before setup_aliases().
I wanted to see how far this was to being split up neatly like ARM is,
I came up with this, which seems pretty good to me. This would
probably be the next step to get to, then you'd lift the individual
set functions higher up the call chain into their respective attach
functions.
static void set_dte_identity(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data,
struct dev_table_entry *target)
{
/*
* SNP does not support TV=1/Mode=1 in any case, and can't do IDENTITY
*/
if (WARN_ON(amd_iommu_snp_en))
return;
/* mode is zero */
target->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
if (dev_data->ats_enabled)
target->data[1] |= DTE_FLAG_IOTLB;
/* ppr is not allowed for identity */
target->data128[0] |= dev_data->dte_cache.data128[0];
target->data128[1] |= dev_data->dte_cache.data128[1];
}
static void set_dte_gcr3_table(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data,
struct dev_table_entry *target)
{
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
u64 gcr3;
if (!gcr3_info->gcr3_tbl)
return;
pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
__func__, dev_data->devid, gcr3_info->glx,
(unsigned long long)gcr3_info->gcr3_tbl);
gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
target->data[0] |= DTE_FLAG_GV | DTE_FLAG_TV | DTE_FLAG_IR |
DTE_FLAG_IW | DTE_FLAG_V |
FIELD_PREP(DTE_GLX, gcr3_info->glx) |
FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
if (pdom_is_v2_pgtbl_mode(dev_data->domain))
target->data[0] |= DTE_FLAG_GIOV;
target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
/* Guest page table can only support 4 and 5 levels */
target->data[2] |= FIELD_PREP(
DTE_GPT_LEVEL_MASK, (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL ?
GUEST_PGTABLE_5_LEVEL :
GUEST_PGTABLE_4_LEVEL));
target->data[1] |= dev_data->gcr3_info.domid;
if (dev_data->ppr)
target->data[0] |= 1ULL << DEV_ENTRY_PPR;
if (dev_data->ats_enabled)
target->data[1] |= DTE_FLAG_IOTLB;
target->data128[0] |= dev_data->dte_cache.data128[0];
target->data128[1] |= dev_data->dte_cache.data128[1];
}
static void set_dte_paging(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data,
struct dev_table_entry *target)
{
struct protection_domain *domain = dev_data->domain;
target->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW |
iommu_virt_to_phys(domain->iop.root) |
((domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT) |
DTE_FLAG_V;
if (dev_data->ppr)
target->data[0] |= 1ULL << DEV_ENTRY_PPR;
if (domain->dirty_tracking)
target->data[0] |= DTE_FLAG_HAD;
target->data[1] |= domain->id;
if (dev_data->ats_enabled)
target->data[1] |= DTE_FLAG_IOTLB;
target->data128[0] |= dev_data->dte_cache.data128[0];
target->data128[1] |= dev_data->dte_cache.data128[1];
}
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data)
{
u32 old_domid;
struct dev_table_entry new = {};
struct protection_domain *domain = dev_data->domain;
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
make_clear_dte(dev_data, dte, &new);
if (gcr3_info && gcr3_info->gcr3_tbl)
set_dte_gcr3_table(iommu, dev_data, &new);
else if (domain->iop.mode == PAGE_MODE_NONE)
set_dte_identity(iommu, dev_data, &new);
else
set_dte_paging(iommu, dev_data, &new);
old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
update_dte256(iommu, dev_data, &new);
/*
* A kdump kernel might be replacing a domain ID that was copied from
* the previous kernel--if so, it needs to flush the translation cache
* entries for the old domain ID that is being overwritten
*/
if (old_domid) {
amd_iommu_flush_tlb_domid(iommu, old_domid);
}
}
Powered by blists - more mailing lists