[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230721063513.33431-4-tanmay@marvell.com>
Date: Fri, 21 Jul 2023 02:35:12 -0400
From: Tanmay Jagdale <tanmay@...vell.com>
To: <will@...nel.org>, <robin.murphy@....com>, <joro@...tes.org>,
<baolu.lu@...ux.intel.com>, <thunder.leizhen@...wei.com>
CC: <linux-arm-kernel@...ts.infradead.org>, <iommu@...ts.linux.dev>,
<linux-kernel@...r.kernel.org>, <gcherian@...vell.com>,
<sgoutham@...vell.com>, <lcherian@...vell.com>,
<bbhushan2@...vell.com>
Subject: [RESEND PATCH 3/4] iommu/arm-smmu-v3: Add arm_smmu_ecmdq_issue_cmdlist() for non-shared ECMDQ
From: Zhen Lei <thunder.leizhen@...wei.com>
When a core can exclusively own an ECMDQ, competition with other cores
does not need to be considered during command insertion. Therefore, we can
delete the part of arm_smmu_cmdq_issue_cmdlist() that deals with
multi-core contention and generate a more efficient ECMDQ-specific
function arm_smmu_ecmdq_issue_cmdlist().
Signed-off-by: Zhen Lei <thunder.leizhen@...wei.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 85 +++++++++++++++++++++
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
2 files changed, 86 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 1b3b37a1972e..dc3ff4796aaf 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -777,6 +777,87 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
}
}
+/*
+ * The function is used when the current core exclusively occupies an ECMDQ.
+ * This is a reduced version of arm_smmu_cmdq_issue_cmdlist(), which eliminates
+ * a lot of unnecessary inter-core competition considerations.
+ */
+static int arm_smmu_ecmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ u64 *cmds, int n, bool sync)
+{
+ u32 prod;
+ unsigned long flags;
+ struct arm_smmu_ll_queue llq = {
+ .max_n_shift = cmdq->q.llq.max_n_shift,
+ }, head;
+ int ret = 0;
+
+ /* 1. Allocate some space in the queue */
+ local_irq_save(flags);
+ llq.val = READ_ONCE(cmdq->q.llq.val);
+ do {
+ u64 old;
+
+ while (!queue_has_space(&llq, n + sync)) {
+ local_irq_restore(flags);
+ if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+ dev_err_ratelimited(smmu->dev, "ECMDQ timeout\n");
+ local_irq_save(flags);
+ }
+
+ head.cons = llq.cons;
+ head.prod = queue_inc_prod_n(&llq, n + sync);
+
+ old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
+ if (old == llq.val)
+ break;
+
+ llq.val = old;
+ } while (1);
+
+ /* 2. Write our commands into the queue */
+ arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
+ if (sync) {
+ u64 cmd_sync[CMDQ_ENT_DWORDS];
+
+ prod = queue_inc_prod_n(&llq, n);
+ arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
+ }
+
+ /* 3. Ensuring commands are visible first */
+ dma_wmb();
+
+ /* 4. Advance the hardware prod pointer */
+ read_lock(&cmdq->q.ecmdq_lock);
+ writel_relaxed(head.prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
+ read_unlock(&cmdq->q.ecmdq_lock);
+
+ /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
+ if (sync) {
+ llq.prod = queue_inc_prod_n(&llq, n);
+ ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+ if (ret) {
+ dev_err_ratelimited(smmu->dev,
+ "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
+ llq.prod,
+ readl_relaxed(cmdq->q.prod_reg),
+ readl_relaxed(cmdq->q.cons_reg));
+ }
+
+ /*
+ * Update cmdq->q.llq.cons, to improve the success rate of
+ * queue_has_space() when some new commands are inserted next
+ * time.
+ */
+ WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+
/*
* This is the actual insertion function, and provides the following
* ordering guarantees to callers:
@@ -806,6 +887,9 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
llq.max_n_shift = cmdq->q.llq.max_n_shift;
+ if (!cmdq->shared)
+ return arm_smmu_ecmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
+
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val);
@@ -3022,6 +3106,7 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+ cmdq->shared = 1;
atomic_set(&cmdq->owner_prod, 0);
atomic_set(&cmdq->lock, 0);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 1f8777817e31..a8988fcd605f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -574,6 +574,7 @@ struct arm_smmu_cmdq {
atomic_long_t *valid_map;
atomic_t owner_prod;
atomic_t lock;
+ int shared;
};
struct arm_smmu_ecmdq {
--
2.34.1
Powered by blists - more mailing lists