[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b5866f7a-013a-5900-6fce-268052f2ba0a@huawei.com>
Date: Mon, 12 Aug 2019 11:42:17 +0100
From: John Garry <john.garry@...wei.com>
To: Zhen Lei <thunder.leizhen@...wei.com>,
Jean-Philippe Brucker <jean-philippe.brucker@....com>,
Robin Murphy <robin.murphy@....com>,
"Will Deacon" <will@...nel.org>, Joerg Roedel <joro@...tes.org>,
linux-arm-kernel <linux-arm-kernel@...ts.infradead.org>,
iommu <iommu@...ts.linux-foundation.org>,
linux-kernel <linux-kernel@...r.kernel.org>,
<jean-philippe@...aro.org>
Subject: Re: [PATCH] iommu/arm-smmu-v3: add nr_ats_masters to avoid
unnecessary operations
On 01/08/2019 13:20, Zhen Lei wrote:
> When (smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS) is true, even if a
> smmu domain does not contain any ats master, the operations of
> arm_smmu_atc_inv_to_cmd() and lock protection in arm_smmu_atc_inv_domain()
> are always executed. This will impact performance, especially in
> multi-core and stress scenarios. For my FIO test scenario, about 8%
> performance reduced.
>
> In fact, we can use a atomic member to record how many ats masters the
> smmu contains. And check that without traverse the list and check all
> masters one by one in the lock protection.
>
Hi Will, Robin, Jean-Philippe,
Can you kindly check this issue? We have seen a signifigant performance
regression here.
Thanks!
> Fixes: 9ce27afc0830 ("iommu/arm-smmu-v3: Add support for PCI ATS")
> Signed-off-by: Zhen Lei <thunder.leizhen@...wei.com>
> ---
> drivers/iommu/arm-smmu-v3.c | 10 ++++++++--
> 1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index a9a9fabd396804a..1b370d9aca95f94 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -631,6 +631,7 @@ struct arm_smmu_domain {
>
> struct io_pgtable_ops *pgtbl_ops;
> bool non_strict;
> + atomic_t nr_ats_masters;
It's not ideal to keep a separate count of ats masters...hmmm
>
> enum arm_smmu_domain_stage stage;
> union {
> @@ -1531,7 +1532,7 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
> struct arm_smmu_cmdq_ent cmd;
> struct arm_smmu_master *master;
>
> - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
> + if (!atomic_read(&smmu_domain->nr_ats_masters))
> return 0;
The rest of the code is here:
arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head)
ret |= arm_smmu_atc_inv_master(master, &cmd);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
return ret ? -ETIMEDOUT : 0;
}
Not directly related to leizhen's issue: Could RCU protection be used
for this list iteration? I can't imagine that the devices list changes
often. And also we already protect the cmdq in arm_smmu_atc_inv_master().
>
> arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
> @@ -1869,6 +1870,7 @@ static int arm_smmu_enable_ats(struct arm_smmu_master *master)
> size_t stu;
> struct pci_dev *pdev;
> struct arm_smmu_device *smmu = master->smmu;
> + struct arm_smmu_domain *smmu_domain = master->domain;
> struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
>
> if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
> @@ -1887,12 +1889,15 @@ static int arm_smmu_enable_ats(struct arm_smmu_master *master)
> return ret;
>
> master->ats_enabled = true;
> + atomic_inc(&smmu_domain->nr_ats_masters);
> +
> return 0;
> }
>
> static void arm_smmu_disable_ats(struct arm_smmu_master *master)
> {
> struct arm_smmu_cmdq_ent cmd;
> + struct arm_smmu_domain *smmu_domain = master->domain;
>
> if (!master->ats_enabled || !dev_is_pci(master->dev))
> return;
> @@ -1901,6 +1906,7 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master)
> arm_smmu_atc_inv_master(master, &cmd);
> pci_disable_ats(to_pci_dev(master->dev));
> master->ats_enabled = false;
> + atomic_dec(&smmu_domain->nr_ats_masters);
> }
>
> static void arm_smmu_detach_dev(struct arm_smmu_master *master)
> @@ -1915,10 +1921,10 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
> list_del(&master->domain_head);
> spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
>
> - master->domain = NULL;
> arm_smmu_install_ste_for_dev(master);
>
> arm_smmu_disable_ats(master);
> + master->domain = NULL;
> }
>
> static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>
Powered by blists - more mailing lists