[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251219170551.GF254720@nvidia.com>
Date: Fri, 19 Dec 2025 13:05:51 -0400
From: Jason Gunthorpe <jgg@...dia.com>
To: Nicolin Chen <nicolinc@...dia.com>
Cc: will@...nel.org, robin.murphy@....com, joro@...tes.org, jpb@...nel.org,
praan@...gle.com, miko.lenczewski@....com,
linux-arm-kernel@...ts.infradead.org, iommu@...ts.linux.dev,
linux-kernel@...r.kernel.org, patches@...ts.linux.dev
Subject: Re: [PATCH v1 2/9] iommu/arm-smmu-v3: Add alloc_id/free_id functions
to arm_smmu_invs
On Thu, Dec 18, 2025 at 12:26:48PM -0800, Nicolin Chen wrote:
> @@ -720,6 +723,9 @@ struct arm_smmu_invs {
> rwlock_t rwlock;
> bool has_ats;
> struct rcu_head rcu;
> + struct arm_smmu_domain *smmu_domain;
> + int (*alloc_id)(struct arm_smmu_inv *inv, void *data);
> + void (*free_id)(struct arm_smmu_inv *inv, bool flush);
> struct arm_smmu_inv inv[] __counted_by(max_invs);
> };
I'm not keen on this at all..
On the allocation side everything should be stored inside the
invalidation list, we don't need function callbacks, just get the
information from the list in the first place.
You could probably split it up a bit, introduce arm_smmu_iotlb_tag
first and spread it around, filling it from the domain, then the below
to use the invs array to fill it. But this whole series
should basically just be this:
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d7c492ee093602..5cd71a9690e292 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1188,8 +1188,7 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_merge);
*/
VISIBLE_IF_KUNIT
void arm_smmu_invs_unref(struct arm_smmu_invs *invs,
- struct arm_smmu_invs *to_unref,
- void (*free_fn)(struct arm_smmu_inv *inv))
+ struct arm_smmu_invs *to_unref)
{
unsigned long flags;
size_t num_invs = 0;
@@ -1200,16 +1199,16 @@ void arm_smmu_invs_unref(struct arm_smmu_invs *invs,
if (cmp < 0) {
/* not found in to_unref, leave alone */
num_invs = i + 1;
+ refcount_set(&to_unref->inv[j].users, 1);
} else if (cmp == 0) {
/* same item */
if (!refcount_dec_and_test(&invs->inv[i].users)) {
num_invs = i + 1;
+ refcount_set(&to_unref->inv[j].users, 1);
continue;
}
- /* KUNIT test doesn't pass in a free_fn */
- if (free_fn)
- free_fn(&invs->inv[i]);
+ refcount_set(&to_unref->inv[j].users, 0);
invs->num_trashes++;
} else {
/* item in to_unref is not in invs or already a trash */
@@ -3046,6 +3045,13 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
return NULL;
}
+static struct arm_vsmmu *to_vsmmu(struct iommu_domain *domain)
+{
+ if (domain->type == IOMMU_DOMAIN_NESTED)
+ return to_smmu_nested_domain(domain)->vsmmu;
+ return NULL;
+}
+
/*
* If the domain uses the smmu_domain->devices list return the arm_smmu_domain
* structure, otherwise NULL. These domains track attached devices so they can
@@ -3156,12 +3162,136 @@ arm_smmu_master_build_inv(struct arm_smmu_master *master,
case INV_TYPE_ATS:
case INV_TYPE_ATS_FULL:
cur->size_opcode = cur->nsize_opcode = CMDQ_OP_ATC_INV;
+ cur->ssid = ssid;
break;
}
return cur;
}
+static int arm_smmu_get_id_from_invs(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu,
+ unsigned int type)
+{
+ struct arm_smmu_invs *invs = rcu_dereference_protected(
+ smmu_domain->invs, lockdep_is_held(&arm_smmu_asid_lock));
+ size_t i;
+
+ for (i = 0; i != invs->num_invs; i++) {
+ if (invs->inv[i].type == type &&
+ invs->inv[i].smmu == smmu &&
+ refcount_read(&invs->inv[i].users))
+ return invs->inv[i].id;
+ }
+
+ return -ENOENT;
+}
+
+static int arm_smmu_get_tag(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_master *master,
+ struct arm_vsmmu *vsmmu,
+ struct arm_smmu_iotlb_tag *tag, bool no_alloc)
+{
+ struct arm_smmu_device *smmu = master->smmu;
+ int ret;
+ int id;
+
+ lockdep_assert_held(&arm_smmu_asid_lock);
+
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_SVA:
+ case ARM_SMMU_DOMAIN_S1: {
+ u32 asid;
+
+ if (WARN_ON(vsmmu))
+ return -EINVAL;
+
+ id = arm_smmu_get_id_from_invs(smmu_domain, smmu,
+ INV_TYPE_S1_ASID);
+ if (id > 0) {
+ tag->asid = id;
+ return 0;
+ }
+
+ if (no_alloc)
+ return -ENOENT;
+
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1),
+ GFP_KERNEL);
+ if (ret)
+ return ret;
+ tag->asid = asid;
+ tag->need_free = 1;
+ return 0;
+ }
+
+ case ARM_SMMU_DOMAIN_S2:
+ if (smmu_domain->nest_parent) {
+ /* FIXME we can support attaching a nest_parent without
+ * a vsmmu, but to do that we need to fix
+ * arm_smmu_get_id_from_invs() to never return the vmid
+ * of a vsmmu. Probably by making a
+ * INV_TYPE_S2_VMID_VSMMU */
+ id = vsmmu->vmid;
+ return 0;
+ }
+
+ id = arm_smmu_get_id_from_invs(smmu_domain, smmu,
+ INV_TYPE_S2_VMID);
+ if (id > 0) {
+ tag->vmid = id;
+ break;
+ }
+
+ if (no_alloc)
+ return -ENOENT;
+
+ /* Reserve VMID 0 for stage-2 bypass STEs */
+ id = ida_alloc_range(&smmu->vmid_map, 1,
+ (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
+ if (id < 0)
+ return id;
+ tag->vmid = id;
+ tag->need_free = 1;
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static void arm_smmu_free_tag(struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_iotlb_tag *tag,
+ struct arm_smmu_inv *inv)
+{
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_cmdq_ent cmd = {};
+
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_SVA:
+ case ARM_SMMU_DOMAIN_S1:
+ cmd.opcode = inv->nsize_opcode;
+ cmd.tlbi.asid = tag->asid;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ xa_erase(&arm_smmu_asid_xa, tag->asid);
+ return;
+
+ case ARM_SMMU_DOMAIN_S2:
+ cmd.opcode = inv->nsize_opcode;
+ cmd.tlbi.vmid = tag->vmid;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+
+ /*
+ * Even though the VMID remains with the viommu and is not freed
+ * we are stopping invalidations of it so it has to be cleared.
+ */
+ if (smmu_domain->nest_parent)
+ return;
+ ida_free(&smmu->vmid_map, tag->vmid);
+ return;
+ }
+}
+
/*
* Use the preallocated scratch array at master->build_invs, to build a to_merge
* or to_unref array, to pass into a following arm_smmu_invs_merge/unref() call.
@@ -3171,7 +3301,8 @@ arm_smmu_master_build_inv(struct arm_smmu_master *master,
*/
static struct arm_smmu_invs *
arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,
- ioasid_t ssid, struct arm_smmu_domain *smmu_domain)
+ ioasid_t ssid, struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_iotlb_tag *tag)
{
const bool nesting = smmu_domain->nest_parent;
size_t pgsize = 0, i;
@@ -3188,14 +3319,14 @@ arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,
case ARM_SMMU_DOMAIN_SVA:
case ARM_SMMU_DOMAIN_S1:
if (!arm_smmu_master_build_inv(master, INV_TYPE_S1_ASID,
- smmu_domain->cd.asid,
- IOMMU_NO_PASID, pgsize))
+ tag->asid, IOMMU_NO_PASID,
+ pgsize))
return NULL;
break;
case ARM_SMMU_DOMAIN_S2:
if (!arm_smmu_master_build_inv(master, INV_TYPE_S2_VMID,
- smmu_domain->s2_cfg.vmid,
- IOMMU_NO_PASID, pgsize))
+ tag->vmid, IOMMU_NO_PASID,
+ pgsize))
return NULL;
break;
default:
@@ -3205,9 +3336,9 @@ arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,
/* All the nested S1 ASIDs have to be flushed when S2 parent changes */
if (nesting) {
- if (!arm_smmu_master_build_inv(
- master, INV_TYPE_S2_VMID_S1_CLEAR,
- smmu_domain->s2_cfg.vmid, IOMMU_NO_PASID, 0))
+ if (!arm_smmu_master_build_inv(master,
+ INV_TYPE_S2_VMID_S1_CLEAR,
+ tag->vmid, IOMMU_NO_PASID, 0))
return NULL;
}
@@ -3270,12 +3401,14 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
* we are sequencing to update them.
*/
static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state,
+ struct iommu_domain *new_domain,
struct arm_smmu_domain *new_smmu_domain)
{
struct arm_smmu_domain *old_smmu_domain =
to_smmu_domain_devices(state->old_domain);
struct arm_smmu_master *master = state->master;
ioasid_t ssid = state->ssid;
+ int ret;
/*
* At this point a NULL domain indicates the domain doesn't use the
@@ -3289,8 +3422,18 @@ static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state,
invst->old_invs = rcu_dereference_protected(
new_smmu_domain->invs,
lockdep_is_held(&arm_smmu_asid_lock));
- build_invs = arm_smmu_master_build_invs(
- master, state->ats_enabled, ssid, new_smmu_domain);
+
+ /* Re-use or allocate an IOTLB cache tag */
+ ret = arm_smmu_get_tag(new_smmu_domain, master,
+ to_vsmmu(new_domain), &invst->tag,
+ false);
+ if (ret)
+ return ret;
+
+ build_invs = arm_smmu_master_build_invs(master,
+ state->ats_enabled,
+ ssid, new_smmu_domain,
+ &invst->tag);
if (!build_invs)
return -EINVAL;
@@ -3311,9 +3454,18 @@ static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state,
invst->old_invs = rcu_dereference_protected(
old_smmu_domain->invs,
lockdep_is_held(&arm_smmu_asid_lock));
+
+ /* Find the IOTLB cache tag this master was using */
+ ret = arm_smmu_get_tag(old_smmu_domain, master,
+ to_vsmmu(state->old_domain), &invst->tag,
+ true);
+ if (WARN_ON(ret))
+ return ret;
+
/* For old_smmu_domain, new_invs points to master->build_invs */
invst->new_invs = arm_smmu_master_build_invs(
- master, master->ats_enabled, ssid, old_smmu_domain);
+ master, master->ats_enabled, ssid, old_smmu_domain,
+ &invst->tag);
}
return 0;
@@ -3349,36 +3501,13 @@ arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
kfree_rcu(invst->old_invs, rcu);
}
-/*
- * When an array entry's users count reaches zero, it means the ASID/VMID is no
- * longer being invalidated by map/unmap and must be cleaned. The rule is that
- * all ASIDs/VMIDs not in an invalidation array are left cleared in the IOTLB.
- */
-static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
-{
- struct arm_smmu_cmdq_ent cmd = {};
-
- switch (inv->type) {
- case INV_TYPE_S1_ASID:
- cmd.tlbi.asid = inv->id;
- break;
- case INV_TYPE_S2_VMID:
- /* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
- cmd.tlbi.vmid = inv->id;
- break;
- default:
- return;
- }
-
- cmd.opcode = inv->nsize_opcode;
- arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd);
-}
-
/* Should be installed after arm_smmu_install_ste_for_dev() */
static void
arm_smmu_install_old_domain_invs(struct arm_smmu_attach_state *state)
{
struct arm_smmu_inv_state *invst = &state->old_domain_invst;
+ struct arm_smmu_domain *old_smmu_domain =
+ to_smmu_domain_devices(state->old_domain);
struct arm_smmu_invs *old_invs = invst->old_invs;
struct arm_smmu_invs *new_invs;
@@ -3387,8 +3516,10 @@ arm_smmu_install_old_domain_invs(struct arm_smmu_attach_state *state)
if (!invst->invs_ptr)
return;
- arm_smmu_invs_unref(old_invs, invst->new_invs,
- arm_smmu_inv_flush_iotlb_tag);
+ arm_smmu_invs_unref(old_invs, invst->new_invs);
+ if (old_smmu_domain && !refcount_read(&invst->new_invs->inv[0].users))
+ arm_smmu_free_tag(state->master, old_smmu_domain, &invst->tag,
+ invst->new_invs->inv);
new_invs = arm_smmu_invs_purge(old_invs);
if (!new_invs)
@@ -3472,9 +3603,9 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
arm_smmu_ats_supported(master);
}
- ret = arm_smmu_attach_prepare_invs(state, smmu_domain);
+ ret = arm_smmu_attach_prepare_invs(state, new_domain, smmu_domain);
if (ret)
- return ret;
+ goto err_unprepare_invs;
if (smmu_domain) {
if (new_domain->type == IOMMU_DOMAIN_NESTED) {
@@ -3551,6 +3682,10 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
err_free_vmaster:
kfree(state->vmaster);
err_unprepare_invs:
+ if (state->new_domain_invst.tag.need_free)
+ arm_smmu_free_tag(master, smmu_domain,
+ &state->new_domain_invst.tag,
+ state->new_domain_invst.new_invs.inv);
kfree(state->new_domain_invst.new_invs);
return ret;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 4f104c1baa67a2..689b5ca7d61525 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -1103,6 +1103,12 @@ static inline bool arm_smmu_master_canwbs(struct arm_smmu_master *master)
IOMMU_FWSPEC_PCI_RC_CANWBS;
}
+struct arm_smmu_iotlb_tag {
+ u16 need_free : 1;
+ u16 asid;
+ u16 vmid;
+};
+
/**
* struct arm_smmu_inv_state - Per-domain invalidation array state
* @invs_ptr: points to the domain->invs (unwinding nesting/etc.) or is NULL if
@@ -1116,6 +1122,7 @@ struct arm_smmu_inv_state {
struct arm_smmu_invs __rcu **invs_ptr;
struct arm_smmu_invs *old_invs;
struct arm_smmu_invs *new_invs;
+ struct arm_smmu_iotlb_tag tag;
};
struct arm_smmu_attach_state {
Powered by blists - more mailing lists