[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aBKdMaFLPFJYegIS@google.com>
Date: Wed, 30 Apr 2025 21:59:13 +0000
From: Pranjal Shrivastava <praan@...gle.com>
To: Nicolin Chen <nicolinc@...dia.com>
Cc: jgg@...dia.com, kevin.tian@...el.com, corbet@....net, will@...nel.org,
bagasdotme@...il.com, robin.murphy@....com, joro@...tes.org,
thierry.reding@...il.com, vdumpa@...dia.com, jonathanh@...dia.com,
shuah@...nel.org, jsnitsel@...hat.com, nathan@...nel.org,
peterz@...radead.org, yi.l.liu@...el.com, mshavit@...gle.com,
zhangzekun11@...wei.com, iommu@...ts.linux.dev,
linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-tegra@...r.kernel.org,
linux-kselftest@...r.kernel.org, patches@...ts.linux.dev,
mochs@...dia.com, alok.a.tiwari@...cle.com, vasant.hegde@....com
Subject: Re: [PATCH v2 21/22] iommu/tegra241-cmdqv: Add user-space use support
On Fri, Apr 25, 2025 at 10:58:16PM -0700, Nicolin Chen wrote:
> The CMDQV HW supports a user-space use for virtualization cases. It allows
> the VM to issue guest-level TLBI or ATC_INV commands directly to the queue
> and executes them without a VMEXIT, as HW will replace the VMID field in a
> TLBI command and the SID field in an ATC_INV command with the preset VMID
> and SID.
>
> This is built upon the vIOMMU infrastructure by allowing VMM to allocate a
> VINTF (as a vIOMMU object) and assign VCMDQs (vCMDQ objects) to the VINTF.
>
> So firstly, replace the standard vSMMU model with the VINTF implementation
> but reuse the standard cache_invalidate op (for unsupported commands) and
> the standard alloc_domain_nested op (for standard nested STE).
>
> Each VINTF has two 64KB MMIO pages (128B per logical vCMDQ):
> - Page0 (directly accessed by guest) has all the control and status bits.
> - Page1 (trapped by VMM) has guest-owned queue memory location/size info.
>
> VMM should trap the emulated VINTF0's page1 of the guest VM for the guest-
> level VCMDQ location/size info and forward that to the kernel to translate
> to a physical memory location to program the VCMDQ HW during an allocation
> call. Then, it should mmap the assigned VINTF's page0 to the VINTF0 page0
> of the guest VM. This allows the guest OS to read and write the guest-own
> VINTF's page0 for direct control of the VCMDQ HW.
>
> For ATC invalidation commands that hold an SID, it requires all devices to
> register their virtual SIDs to the SID_MATCH registers and their physical
> SIDs to the pairing SID_REPLACE registers, so that HW can use those as a
> lookup table to replace those virtual SIDs with the correct physical SIDs.
> Thus, implement the driver-allocated vDEVICE op with a tegra241_vintf_sid
> structure to allocate SID_REPLACE and to program the SIDs accordingly.
>
> This enables the HW accelerated feature for NVIDIA Grace CPU. Compared to
> the standard SMMUv3 operating in the nested translation mode trapping CMDQ
> for TLBI and ATC_INV commands, this gives a huge performance improvement:
> 70% to 90% reductions of invalidation time were measured by various DMA
> unmap tests running in a guest OS.
>
The write-up is super helpful to understand how the HW works from a high
level. Thanks for explaining this well! :)
I'm curious to know the DMA unmap tests that were run for perf?
> Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 15 +
> include/uapi/linux/iommufd.h | 49 ++-
> .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 6 +-
> .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 374 +++++++++++++++++-
> 4 files changed, 435 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index bab7a9ce1283..d3f18a286447 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -1000,6 +1000,14 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
> bool sync);
>
> +static inline phys_addr_t
> +arm_smmu_domain_ipa_to_pa(struct arm_smmu_domain *smmu_domain, u64 ipa)
> +{
> + if (WARN_ON_ONCE(smmu_domain->stage != ARM_SMMU_DOMAIN_S2))
> + return 0;
> + return iommu_iova_to_phys(&smmu_domain->domain, ipa);
> +}
> +
> #ifdef CONFIG_ARM_SMMU_V3_SVA
> bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
> bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
> @@ -1076,9 +1084,16 @@ int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
> void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state);
> void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master);
> int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt);
> +struct iommu_domain *
> +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
> + const struct iommu_user_data *user_data);
> +int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
> + struct iommu_user_data_array *array);
> #else
> #define arm_smmu_hw_info NULL
> #define arm_vsmmu_alloc NULL
> +#define arm_vsmmu_alloc_domain_nested NULL
> +#define arm_vsmmu_cache_invalidate NULL
>
> static inline int
> arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index b2614f0f1547..d69e7c1d39ea 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -549,12 +549,25 @@ struct iommu_hw_info_vtd {
> __aligned_u64 ecap_reg;
> };
>
> +/**
> + * enum iommu_hw_info_arm_smmuv3_flags - Flags for ARM SMMUv3 hw_info
> + * @IOMMU_HW_INFO_ARM_SMMUV3_HAS_TEGRA241_CMDQV: Tegra241 implementation with
> + * CMDQV support; @impl is valid
> + */
> +enum iommu_hw_info_arm_smmuv3_flags {
> + IOMMU_HW_INFO_ARM_SMMUV3_HAS_TEGRA241_CMDQV = 1 << 0,
> +};
> +
> /**
> * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
> * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
> *
> - * @flags: Must be set to 0
> - * @impl: Must be 0
> + * @flags: Combination of enum iommu_hw_info_arm_smmuv3_flags
> + * @impl: Implementation-defined bits when the following flags are set:
> + * - IOMMU_HW_INFO_ARM_SMMUV3_HAS_TEGRA241_CMDQV
> + * Bits[15:12] - Log2 of the total number of SID replacements
> + * Bits[07:04] - Log2 of the total number of vCMDQs per vIOMMU
> + * Bits[03:00] - Version number for the CMDQ-V HW
Nit: It seems that we deliberately chose not to reveal `NUM_VINTF_LOG2`
to the user-space. If so, maybe we shall mark those bitfields as unused
or reserved for clarity? Bits[11:08] - Reserved / Unused (even 31:16).
> * @idr: Implemented features for ARM SMMU Non-secure programming interface
> * @iidr: Information about the implementation and implementer of ARM SMMU,
> * and architecture version supported
> @@ -952,10 +965,28 @@ struct iommu_fault_alloc {
> * enum iommu_viommu_type - Virtual IOMMU Type
> * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
> * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
> + * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension for SMMUv3
> */
> enum iommu_viommu_type {
> IOMMU_VIOMMU_TYPE_DEFAULT = 0,
> IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
> + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,
> +};
This is a little confusing.. I understand that we need a new viommu type
to copy the new struct iommu_viommu_tegra241_cmdqv b/w the user & kernel
But, in a previous patch (Add vsmmu_alloc impl op), we add a check to
fallback to the standard type SMMUv3, if the impl_ops->vsmmu_alloc
returns -EOPNOTSUPP:
if (master->smmu->impl_ops && master->smmu->impl_ops->vsmmu_alloc)
vsmmu = master->smmu->impl_ops->vsmmu_alloc(
master->smmu, s2_parent, ictx, viommu_type, user_data);
if (PTR_ERR(vsmmu) == -EOPNOTSUPP) {
if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
return ERR_PTR(-EOPNOTSUPP);
/* Fallback to standard SMMUv3 type if viommu_type matches */
vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
&arm_vsmmu_ops);
Now, if we'll ALWAYS try to allocate an impl-specified vsmmu first, even
when the viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3, we are anyways
going to return back from the impl_ops->vsmmu_alloc with -EOPNOTSUPP.
Then we'll again check if the retval was -EOPNOTSUPP and re-check the
viommu_type requested.. which seems a little counter intuitive.
Instead, I'd suggest to simply call the impl_ops->vsmmu_alloc if the
viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3, in case the type isn't
supported, the impl_op->vsmmu_alloc would return with error anyway.
Thus, the snippet from arm_vsmmu_alloc could look like:
if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3) {
if (master->smmu->impl_ops && master->smmu->impl_ops->vsmmu_alloc)
vsmmu = master->smmu->impl_ops->vsmmu_alloc(
master->smmu, s2_parent, ictx, viommu_type, user_data);
if (IS_ERR(vsmmu))
return return ERR_CAST(vsmmu);
} else {
/* Fallback to standard SMMUv3 type */
vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
&arm_vsmmu_ops);
}
> +
> +/**
> + * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface
> + * (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
> + * @out_vintf_page0_pgoff: Offset of the VINTF page0 for mmap syscall
> + * @out_vintf_page0_pgsz: Size of the VINTF page0 for mmap syscall
> + *
> + * Both @out_vintf_page0_pgoff and @out_vintf_page0_pgsz are given by the kernel
> + * for user space to mmap the VINTF page0 from the host physical address space
> + * to the guest physical address space so that a guest kernel can directly R/W
> + * access to the VINTF page0 in order to control its virtual comamnd queues.
> + */
> +struct iommu_viommu_tegra241_cmdqv {
> + __aligned_u64 out_vintf_page0_pgoff;
> + __aligned_u64 out_vintf_page0_pgsz;
> };
>
> /**
> @@ -1152,9 +1183,23 @@ struct iommu_veventq_alloc {
> /**
> * enum iommu_vcmdq_type - Virtual Command Queue Type
> * @IOMMU_VCMDQ_TYPE_DEFAULT: Reserved for future use
> + * @IOMMU_VCMDQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension for SMMUv3
> */
> enum iommu_vcmdq_type {
> IOMMU_VCMDQ_TYPE_DEFAULT = 0,
> + /*
> + * TEGRA241_CMDQV requirements (otherwise it will fail)
> + * - alloc starts from the lowest @index=0 in ascending order
> + * - destroy starts from the last allocated @index in descending order
> + * - @addr must be aligned to @length in bytes and be mmapped in IOAS
> + * - @length must be a power of 2, with a minimum 32 bytes and a maximum
> + * 1 ^ idr[1].CMDQS x 16 bytes (do GET_HW_INFO call to read idr[1] in
> + * struct iommu_hw_info_arm_smmuv3)
> + * - suggest to back the queue memory with contiguous physical pages or
> + * a single huge page with alignment of the queue size, limit vSMMU's
> + * IDR1.CMDQS to the huge page size divided by 16 bytes
> + */
> + IOMMU_VCMDQ_TYPE_TEGRA241_CMDQV = 1,
> };
>
> /**
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> index 63861c60b615..40246cd04656 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> @@ -226,7 +226,7 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
> return 0;
> }
>
> -static struct iommu_domain *
> +struct iommu_domain *
> arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
> const struct iommu_user_data *user_data)
> {
> @@ -337,8 +337,8 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
> return 0;
> }
>
> -static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
> - struct iommu_user_data_array *array)
> +int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
> + struct iommu_user_data_array *array)
> {
> struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
> struct arm_smmu_device *smmu = vsmmu->smmu;
> diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
> index 869c90b660c1..88e2b6506b3a 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
> @@ -8,7 +8,9 @@
> #include <linux/dma-mapping.h>
> #include <linux/interrupt.h>
> #include <linux/iommu.h>
> +#include <linux/iommufd.h>
> #include <linux/iopoll.h>
> +#include <uapi/linux/iommufd.h>
>
> #include <acpi/acpixf.h>
>
> @@ -26,8 +28,10 @@
> #define CMDQV_EN BIT(0)
>
> #define TEGRA241_CMDQV_PARAM 0x0004
> +#define CMDQV_NUM_SID_PER_VM_LOG2 GENMASK(15, 12)
> #define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8)
> #define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4)
> +#define CMDQV_VER GENMASK(3, 0)
>
> #define TEGRA241_CMDQV_STATUS 0x0008
> #define CMDQV_ENABLED BIT(0)
> @@ -53,6 +57,9 @@
> #define VINTF_STATUS GENMASK(3, 1)
> #define VINTF_ENABLED BIT(0)
>
> +#define TEGRA241_VINTF_SID_MATCH(s) (0x0040 + 0x4*(s))
> +#define TEGRA241_VINTF_SID_REPLACE(s) (0x0080 + 0x4*(s))
> +
> #define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \
> (0x00C0 + 0x8*(m))
> #define LVCMDQ_ERR_MAP_NUM_64 2
> @@ -114,16 +121,20 @@ MODULE_PARM_DESC(bypass_vcmdq,
>
> /**
> * struct tegra241_vcmdq - Virtual Command Queue
> + * @core: Embedded iommufd_vcmdq structure
> * @idx: Global index in the CMDQV
> * @lidx: Local index in the VINTF
> * @enabled: Enable status
> * @cmdqv: Parent CMDQV pointer
> * @vintf: Parent VINTF pointer
> + * @prev: Previous LVCMDQ to depend on
> * @cmdq: Command Queue struct
> * @page0: MMIO Page0 base address
> * @page1: MMIO Page1 base address
> */
> struct tegra241_vcmdq {
> + struct iommufd_vcmdq core;
> +
> u16 idx;
> u16 lidx;
>
> @@ -131,22 +142,29 @@ struct tegra241_vcmdq {
>
> struct tegra241_cmdqv *cmdqv;
> struct tegra241_vintf *vintf;
> + struct tegra241_vcmdq *prev;
> struct arm_smmu_cmdq cmdq;
>
> void __iomem *page0;
> void __iomem *page1;
> };
> +#define core_to_vcmdq(v) container_of(v, struct tegra241_vcmdq, core)
>
> /**
> * struct tegra241_vintf - Virtual Interface
> + * @vsmmu: Embedded arm_vsmmu structure
> * @idx: Global index in the CMDQV
> * @enabled: Enable status
> * @hyp_own: Owned by hypervisor (in-kernel)
> * @cmdqv: Parent CMDQV pointer
> * @lvcmdqs: List of logical VCMDQ pointers
> * @base: MMIO base address
> + * @immap_id: Allocated immap_id ID for mmap() call
> + * @sids: Stream ID replacement resources
> */
> struct tegra241_vintf {
> + struct arm_vsmmu vsmmu;
> +
> u16 idx;
>
> bool enabled;
> @@ -156,6 +174,24 @@ struct tegra241_vintf {
> struct tegra241_vcmdq **lvcmdqs;
>
> void __iomem *base;
> + unsigned long immap_id;
> +
> + struct ida sids;
> +};
> +#define viommu_to_vintf(v) container_of(v, struct tegra241_vintf, vsmmu.core)
> +
> +/**
> + * struct tegra241_vintf_sid - Virtual Interface Stream ID Replacement
> + * @core: Embedded iommufd_vdevice structure, holding virtual Stream ID
> + * @vintf: Parent VINTF pointer
> + * @sid: Physical Stream ID
> + * @id: Slot index in the VINTF
> + */
> +struct tegra241_vintf_sid {
> + struct iommufd_vdevice core;
> + struct tegra241_vintf *vintf;
> + u32 sid;
> + u8 idx;
> };
>
> /**
> @@ -163,10 +199,12 @@ struct tegra241_vintf {
> * @smmu: SMMUv3 device
> * @dev: CMDQV device
> * @base: MMIO base address
> + * @base_phys: MMIO physical base address, for mmap
> * @irq: IRQ number
> * @num_vintfs: Total number of VINTFs
> * @num_vcmdqs: Total number of VCMDQs
> * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
> + * @num_sids_per_vintf: Total number of SID replacements per VINTF
> * @vintf_ids: VINTF id allocator
> * @vintfs: List of VINTFs
> */
> @@ -175,12 +213,14 @@ struct tegra241_cmdqv {
> struct device *dev;
>
> void __iomem *base;
> + phys_addr_t base_phys;
> int irq;
>
> /* CMDQV Hardware Params */
> u16 num_vintfs;
> u16 num_vcmdqs;
> u16 num_lvcmdqs_per_vintf;
> + u16 num_sids_per_vintf;
>
> struct ida vintf_ids;
>
> @@ -380,6 +420,12 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
> dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h);
> }
>
> +/* This function is for LVCMDQ, so @vcmdq must be mapped prior */
> +static void _tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
> +{
> + writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));
> +}
> +
> /* This function is for LVCMDQ, so @vcmdq must be mapped prior */
> static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
> {
> @@ -390,7 +436,7 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
> tegra241_vcmdq_hw_deinit(vcmdq);
>
> /* Configure and enable VCMDQ */
> - writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));
> + _tegra241_vcmdq_hw_init(vcmdq);
>
> ret = vcmdq_write_config(vcmdq, VCMDQ_EN);
> if (ret) {
> @@ -420,6 +466,7 @@ static void tegra241_vcmdq_unmap_lvcmdq(struct tegra241_vcmdq *vcmdq)
> static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf)
> {
> u16 lidx = vintf->cmdqv->num_lvcmdqs_per_vintf;
> + int sidx;
>
> /* HW requires to unmap LVCMDQs in descending order */
> while (lidx--) {
> @@ -429,6 +476,10 @@ static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf)
> }
> }
> vintf_write_config(vintf, 0);
> + for (sidx = 0; sidx < vintf->cmdqv->num_sids_per_vintf; sidx++) {
> + writel_relaxed(0, REG_VINTF(vintf, SID_REPLACE(sidx)));
> + writel_relaxed(0, REG_VINTF(vintf, SID_MATCH(sidx)));
> + }
> }
>
> /* Map a global VCMDQ to the pre-assigned LVCMDQ */
> @@ -457,7 +508,8 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
> * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a
> * restricted set of supported commands.
> */
> - regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own);
> + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own) |
> + FIELD_PREP(VINTF_VMID, vintf->vsmmu.vmid);
> writel(regval, REG_VINTF(vintf, CONFIG));
>
> ret = vintf_write_config(vintf, regval | VINTF_EN);
> @@ -584,7 +636,9 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
>
> dev_dbg(vintf->cmdqv->dev,
> "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64));
> - kfree(vcmdq);
> + /* Guest-owned VCMDQ is free-ed with vcmdq by iommufd core */
> + if (vcmdq->vintf->hyp_own)
> + kfree(vcmdq);
> }
>
> static struct tegra241_vcmdq *
> @@ -623,6 +677,9 @@ tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
>
> static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
> {
> + if (cmdqv->vintfs[idx]->immap_id)
> + iommufd_ctx_free_mmap(cmdqv->vintfs[idx]->vsmmu.core.ictx,
> + cmdqv->vintfs[idx]->immap_id);
> kfree(cmdqv->vintfs[idx]->lvcmdqs);
> ida_free(&cmdqv->vintf_ids, idx);
> cmdqv->vintfs[idx] = NULL;
> @@ -671,7 +728,11 @@ static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
>
> dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx);
> tegra241_cmdqv_deinit_vintf(cmdqv, idx);
> - kfree(vintf);
> + if (!vintf->hyp_own)
> + ida_destroy(&vintf->sids);
> + /* Guest-owned VINTF is free-ed with viommu by iommufd core */
> + if (vintf->hyp_own)
> + kfree(vintf);
> }
>
> static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
> @@ -699,10 +760,32 @@ static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
> put_device(cmdqv->dev); /* smmu->impl_dev */
> }
>
> +static struct arm_vsmmu *
> +tegra241_cmdqv_vsmmu_alloc(struct arm_smmu_device *smmu,
> + struct arm_smmu_domain *smmu_domain,
> + struct iommufd_ctx *ictx, unsigned int viommu_type,
> + const struct iommu_user_data *user_data);
> +
> +static u32 tegra241_cmdqv_hw_info(struct arm_smmu_device *smmu, u32 *impl)
> +{
> + struct tegra241_cmdqv *cmdqv =
> + container_of(smmu, struct tegra241_cmdqv, smmu);
> + u32 regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM));
> +
> + *impl = FIELD_GET(CMDQV_VER, regval);
> + *impl |= FIELD_PREP(CMDQV_NUM_VCMDQ_LOG2,
> + ilog2(cmdqv->num_lvcmdqs_per_vintf));
> + *impl |= FIELD_PREP(CMDQV_NUM_SID_PER_VM_LOG2,
> + ilog2(cmdqv->num_sids_per_vintf));
> + return IOMMU_HW_INFO_ARM_SMMUV3_HAS_TEGRA241_CMDQV;
> +}
> +
> static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
> .get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
> .device_reset = tegra241_cmdqv_hw_reset,
> .device_remove = tegra241_cmdqv_remove,
> + .vsmmu_alloc = tegra241_cmdqv_vsmmu_alloc,
> + .hw_info = tegra241_cmdqv_hw_info,
> };
>
> /* Probe Functions */
> @@ -844,6 +927,7 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
> cmdqv->irq = irq;
> cmdqv->base = base;
> cmdqv->dev = smmu->impl_dev;
> + cmdqv->base_phys = res->start;
>
> if (cmdqv->irq > 0) {
> ret = request_threaded_irq(irq, NULL, tegra241_cmdqv_isr,
> @@ -860,6 +944,8 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
> cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval);
> cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval);
> cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs;
> + cmdqv->num_sids_per_vintf =
> + 1 << FIELD_GET(CMDQV_NUM_SID_PER_VM_LOG2, regval);
>
> cmdqv->vintfs =
> kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL);
> @@ -913,3 +999,283 @@ struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
> put_device(smmu->impl_dev);
> return ERR_PTR(-ENODEV);
> }
> +
> +/* User-space vIOMMU and vCMDQ Functions */
> +
> +static int tegra241_vcmdq_hw_init_user(struct tegra241_vcmdq *vcmdq)
> +{
> + char header[64];
> +
> + /* Configure the vcmdq only; User space does the enabling */
> + _tegra241_vcmdq_hw_init(vcmdq);
> +
> + dev_dbg(vcmdq->cmdqv->dev, "%sinited at host PA 0x%llx size 0x%lx\n",
> + lvcmdq_error_header(vcmdq, header, 64),
> + vcmdq->cmdq.q.q_base & VCMDQ_ADDR,
> + 1UL << (vcmdq->cmdq.q.q_base & VCMDQ_LOG2SIZE));
> + return 0;
> +}
> +
> +static struct iommufd_vcmdq *
> +tegra241_vintf_alloc_lvcmdq_user(struct iommufd_viommu *viommu,
> + unsigned int type, u32 index, dma_addr_t addr,
> + size_t length)
> +{
> + struct tegra241_vintf *vintf = viommu_to_vintf(viommu);
> + struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
> + struct arm_smmu_device *smmu = &cmdqv->smmu;
> + struct tegra241_vcmdq *vcmdq, *prev = NULL;
> + u32 log2size, max_n_shift;
> + phys_addr_t q_base;
> + char header[64];
> + int ret;
> +
> + if (type != IOMMU_VCMDQ_TYPE_TEGRA241_CMDQV)
> + return ERR_PTR(-EOPNOTSUPP);
> + if (index >= cmdqv->num_lvcmdqs_per_vintf)
> + return ERR_PTR(-EINVAL);
> + if (vintf->lvcmdqs[index])
> + return ERR_PTR(-EEXIST);
> + /*
> + * HW requires to map LVCMDQs in ascending order, so reject if the
> + * previous lvcmdqs is not allocated yet.
> + */
> + if (index) {
> + prev = vintf->lvcmdqs[index - 1];
> + if (!prev)
> + return ERR_PTR(-EIO);
> + }
> + /*
> + * @length must be a power of 2, in range of
> + * [ 32, 1 ^ (idr[1].CMDQS + CMDQ_ENT_SZ_SHIFT) ]
> + */
Nit: 2 ^ (idr[1].CMDQS + CMDQ_ENT_SZ_SHIFT) to match the comment in uapi
> + max_n_shift = FIELD_GET(IDR1_CMDQS,
> + readl_relaxed(smmu->base + ARM_SMMU_IDR1));
> + if (!is_power_of_2(length) || length < 32 ||
> + length > (1 << (max_n_shift + CMDQ_ENT_SZ_SHIFT)))
> + return ERR_PTR(-EINVAL);
> + log2size = ilog2(length) - CMDQ_ENT_SZ_SHIFT;
> +
> + /* @addr must be aligned to @length and be mapped in s2_parent domain */
> + if (addr & ~VCMDQ_ADDR || addr & (length - 1))
> + return ERR_PTR(-EINVAL);
> + q_base = arm_smmu_domain_ipa_to_pa(vintf->vsmmu.s2_parent, addr);
> + if (!q_base)
> + return ERR_PTR(-ENXIO);
> +
> + vcmdq = iommufd_vcmdq_alloc(viommu, struct tegra241_vcmdq, core);
> + if (!vcmdq)
> + return ERR_PTR(-ENOMEM);
> +
> + /*
> + * HW requires to unmap LVCMDQs in descending order, so destroy() must
> + * follow this rule. Set a dependency on its previous LVCMDQ so iommufd
> + * core will help enforce it.
> + */
> + if (prev) {
> + ret = iommufd_vcmdq_depend(vcmdq, prev, core);
> + if (ret)
> + goto free_vcmdq;
> + }
> + vcmdq->prev = prev;
> +
> + ret = tegra241_vintf_init_lvcmdq(vintf, index, vcmdq);
> + if (ret)
> + goto free_vcmdq;
> +
> + dev_dbg(cmdqv->dev, "%sallocated\n",
> + lvcmdq_error_header(vcmdq, header, 64));
> +
> + tegra241_vcmdq_map_lvcmdq(vcmdq);
> +
> + vcmdq->cmdq.q.q_base = q_base & VCMDQ_ADDR;
> + vcmdq->cmdq.q.q_base |= log2size;
> +
> + ret = tegra241_vcmdq_hw_init_user(vcmdq);
> + if (ret)
> + goto free_vcmdq;
> + vintf->lvcmdqs[index] = vcmdq;
> +
> + return &vcmdq->core;
> +free_vcmdq:
> + iommufd_struct_destroy(viommu->ictx, vcmdq, core);
> + return ERR_PTR(ret);
Are we missing an undepend here?
if (vcmdq->prev)
iommufd_vcmdq_undepend(vcmdq, vcmdq->prev, core);
> +}
> +
> +static void tegra241_vintf_destroy_lvcmdq_user(struct iommufd_vcmdq *core)
> +{
> + struct tegra241_vcmdq *vcmdq = core_to_vcmdq(core);
> +
> + tegra241_vcmdq_hw_deinit(vcmdq);
> + tegra241_vcmdq_unmap_lvcmdq(vcmdq);
> + tegra241_vintf_free_lvcmdq(vcmdq->vintf, vcmdq->lidx);
> + if (vcmdq->prev)
> + iommufd_vcmdq_undepend(vcmdq, vcmdq->prev, core);
> +
> + /* IOMMUFD core frees the memory of vcmdq and vcmdq */
> +}
> +
> +static void tegra241_cmdqv_destroy_vintf_user(struct iommufd_viommu *viommu)
> +{
> + struct tegra241_vintf *vintf = viommu_to_vintf(viommu);
> +
> + tegra241_cmdqv_remove_vintf(vintf->cmdqv, vintf->idx);
> +
> + /* IOMMUFD core frees the memory of vintf and viommu */
> +}
> +
> +static struct iommufd_vdevice *
> +tegra241_vintf_alloc_vdevice(struct iommufd_viommu *viommu, struct device *dev,
> + u64 dev_id)
> +{
> + struct tegra241_vintf *vintf = viommu_to_vintf(viommu);
> + struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> + struct arm_smmu_stream *stream = &master->streams[0];
> + struct tegra241_vintf_sid *vsid;
> + int sidx;
> +
> + if (dev_id > UINT_MAX)
> + return ERR_PTR(-EINVAL);
> +
> + vsid = iommufd_vdevice_alloc(viommu, struct tegra241_vintf_sid, core);
> + if (!vsid)
> + return ERR_PTR(-ENOMEM);
> +
> + WARN_ON_ONCE(master->num_streams != 1);
> +
> + /* Find an empty pair of SID_REPLACE and SID_MATCH */
> + sidx = ida_alloc_max(&vintf->sids, vintf->cmdqv->num_sids_per_vintf - 1,
> + GFP_KERNEL);
> + if (sidx < 0) {
> + iommufd_struct_destroy(viommu->ictx, vsid, core);
> + return ERR_PTR(sidx);
> + }
> +
> + writel_relaxed(stream->id, REG_VINTF(vintf, SID_REPLACE(sidx)));
> + writel_relaxed(dev_id << 1 | 0x1, REG_VINTF(vintf, SID_MATCH(sidx)));
> + dev_dbg(vintf->cmdqv->dev,
> + "VINTF%u: allocated SID_REPLACE%d for pSID=%x, vSID=%x\n",
> + vintf->idx, sidx, stream->id, (u32)dev_id);
> +
> + vsid->idx = sidx;
> + vsid->vintf = vintf;
> + vsid->sid = stream->id;
> +
> + return &vsid->core;
> +}
> +
> +static void tegra241_vintf_destroy_vdevice(struct iommufd_vdevice *vdev)
> +{
> + struct tegra241_vintf_sid *vsid =
> + container_of(vdev, struct tegra241_vintf_sid, core);
> + struct tegra241_vintf *vintf = vsid->vintf;
> +
> + writel_relaxed(0, REG_VINTF(vintf, SID_REPLACE(vsid->idx)));
> + writel_relaxed(0, REG_VINTF(vintf, SID_MATCH(vsid->idx)));
Just a thought: Should these be writel to avoid races?
Although I believe all user-queues would be free-d by this point?
> + ida_free(&vintf->sids, vsid->idx);
> + dev_dbg(vintf->cmdqv->dev,
> + "VINTF%u: deallocated SID_REPLACE%d for pSID=%x\n", vintf->idx,
> + vsid->idx, vsid->sid);
> +
> + /* IOMMUFD core frees the memory of vsid and vdev */
> +}
> +
> +static struct iommufd_viommu_ops tegra241_cmdqv_viommu_ops = {
> + .destroy = tegra241_cmdqv_destroy_vintf_user,
> + .alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
> + .cache_invalidate = arm_vsmmu_cache_invalidate,
> + .vdevice_alloc = tegra241_vintf_alloc_vdevice,
> + .vdevice_destroy = tegra241_vintf_destroy_vdevice,
> + .vcmdq_alloc = tegra241_vintf_alloc_lvcmdq_user,
> + .vcmdq_destroy = tegra241_vintf_destroy_lvcmdq_user,
> +};
> +
> +static struct arm_vsmmu *
> +tegra241_cmdqv_vsmmu_alloc(struct arm_smmu_device *smmu,
> + struct arm_smmu_domain *s2_parent,
> + struct iommufd_ctx *ictx, unsigned int viommu_type,
> + const struct iommu_user_data *user_data)
> +{
> + struct tegra241_cmdqv *cmdqv =
> + container_of(smmu, struct tegra241_cmdqv, smmu);
> + struct iommu_viommu_tegra241_cmdqv data;
> + struct tegra241_vintf *vintf;
> + phys_addr_t page0_base;
> + int ret;
> +
> + if (viommu_type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
> + return ERR_PTR(-EOPNOTSUPP);
> + if (!user_data)
> + return ERR_PTR(-EINVAL);
> +
> + ret = iommu_copy_struct_from_user(&data, user_data,
> + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV,
> + out_vintf_page0_pgsz);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + vintf = iommufd_viommu_alloc(ictx, struct tegra241_vintf, vsmmu.core,
> + &tegra241_cmdqv_viommu_ops);
> + if (!vintf)
> + return ERR_PTR(-ENOMEM);
> +
> + ret = tegra241_cmdqv_init_vintf(cmdqv, cmdqv->num_vintfs - 1, vintf);
> + if (ret < 0) {
> + dev_err(cmdqv->dev, "no more available vintf\n");
> + goto free_vintf;
> + }
> +
> + vintf->vsmmu.smmu = smmu;
> + vintf->vsmmu.s2_parent = s2_parent;
> + /* FIXME Move VMID allocation from the S2 domain allocation to here */
> + vintf->vsmmu.vmid = s2_parent->s2_cfg.vmid;
> +
> + /*
> + * Initialize the user-owned VINTF without a LVCMDQ, because it has to
> + * wait for the allocation of a user-owned LVCMDQ, for security reason.
> + * It is different than the kernel-owned VINTF0, which had pre-assigned
> + * and pre-allocated global VCMDQs that would be mapped to the LVCMDQs
> + * by the tegra241_vintf_hw_init() call.
> + */
> + ret = tegra241_vintf_hw_init(vintf, false);
> + if (ret)
> + goto deinit_vintf;
> +
> + vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf,
> + sizeof(*vintf->lvcmdqs), GFP_KERNEL);
> + if (!vintf->lvcmdqs) {
> + ret = -ENOMEM;
> + goto hw_deinit_vintf;
> + }
> +
> + page0_base = cmdqv->base_phys + TEGRA241_VINTFi_PAGE0(vintf->idx);
> + ret = iommufd_ctx_alloc_mmap(ictx, page0_base, SZ_64K,
> + &vintf->immap_id);
> + if (ret)
> + goto hw_deinit_vintf;
> +
> + data.out_vintf_page0_pgsz = SZ_64K;
> + data.out_vintf_page0_pgoff = vintf->immap_id;
> + ret = iommu_copy_struct_to_user(user_data, &data,
> + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV,
> + out_vintf_page0_pgsz);
> + if (ret)
> + goto free_mmap;
> +
> + ida_init(&vintf->sids);
> +
> + dev_dbg(cmdqv->dev, "VINTF%u: allocated with vmid (%d)\n", vintf->idx,
> + vintf->vsmmu.vmid);
> +
> + return &vintf->vsmmu;
> +
> +free_mmap:
> + iommufd_ctx_free_mmap(ictx, vintf->immap_id);
> +hw_deinit_vintf:
> + tegra241_vintf_hw_deinit(vintf);
> +deinit_vintf:
> + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
> +free_vintf:
> + iommufd_struct_destroy(ictx, vintf, vsmmu.core);
> + return ERR_PTR(ret);
> +}
Thanks,
Praan
> --
> 2.43.0
>
Powered by blists - more mailing lists