lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAA8EJppypaVCBLV1rGPUgbVdtm9Zz9haJiYd=4ar+-bXLPh7DA@mail.gmail.com>
Date: Thu, 21 Dec 2023 02:37:25 +0200
From: Dmitry Baryshkov <dmitry.baryshkov@...aro.org>
To: Bibek Kumar Patro <quic_bibekkum@...cinc.com>
Cc: will@...nel.org, robin.murphy@....com, joro@...tes.org, 
	konrad.dybcio@...aro.org, jsnitsel@...hat.com, quic_bjorande@...cinc.com, 
	mani@...nel.org, quic_eberman@...cinc.com, robdclark@...omium.org, 
	u.kleine-koenig@...gutronix.de, robh@...nel.org, vladimir.oltean@....com, 
	quic_pkondeti@...cinc.com, quic_molvera@...cinc.com, 
	linux-arm-msm@...r.kernel.org, linux-arm-kernel@...ts.infradead.org, 
	iommu@...ts.linux.dev, linux-kernel@...r.kernel.org, 
	qipl.kernel.upstream@...cinc.com
Subject: Re: [PATCH v6 3/5] iommu/arm-smmu: introduction of ACTLR for custom
 prefetcher settings

On Thu, 21 Dec 2023 at 02:36, Dmitry Baryshkov
<dmitry.baryshkov@...aro.org> wrote:
>
> On Wed, 20 Dec 2023 at 15:39, Bibek Kumar Patro
> <quic_bibekkum@...cinc.com> wrote:
> >
> > Currently in Qualcomm  SoCs the default prefetch is set to 1 which allows
> > the TLB to fetch just the next page table. MMU-500 features ACTLR
> > register which is implementation defined and is used for Qualcomm SoCs
> > to have a custom prefetch setting enabling TLB to prefetch the next set
> > of page tables accordingly allowing for faster translations.
> >
> > ACTLR value is unique for each SMR (Stream matching register) and stored
> > in a pre-populated table. This value is set to the register during
> > context bank initialisation.
> >
> > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@...cinc.com>
> > ---
> >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 45 ++++++++++++++++++++++
> >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h |  6 ++-
> >  drivers/iommu/arm/arm-smmu/arm-smmu.c      |  5 ++-
> >  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  5 +++
> >  4 files changed, 58 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > index 20c9836d859b..1cefdd0ca110 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > @@ -24,6 +24,12 @@
> >  #define CPRE                   (1 << 1)
> >  #define CMTLB                  (1 << 0)
> >
> > +struct actlr_config {
> > +       u16 sid;
> > +       u16 mask;
> > +       u32 actlr;
> > +};
> > +
> >  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >  {
> >         return container_of(smmu, struct qcom_smmu, smmu);
> > @@ -215,9 +221,38 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >         return true;
> >  }
> >
> > +static void qcom_smmu_set_actlr(struct device *dev, struct arm_smmu_device *smmu, int cbndx,
> > +               const struct actlr_config *actlrcfg)
> > +{
> > +       struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
> > +       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> > +       struct arm_smmu_smr *smr;
> > +       u16 mask;
> > +       int idx;
> > +       u16 id;
> > +       int i;
> > +
> > +       for (; actlrcfg->sid || actlrcfg->mask || actlrcfg->actlr; actlrcfg++) {
> > +               id = actlrcfg->sid;
> > +               mask = actlrcfg->mask;
> > +
> > +               for_each_cfg_sme(cfg, fwspec, i, idx) {
> > +                       smr = &smmu->smrs[idx];
> > +                       if (smr_is_subset(smr, id, mask)) {
> > +                               arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR,
> > +                                               actlrcfg->actlr);
> > +                               break;
> > +                       }
> > +               }
> > +       }
> > +}
> > +
> >  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >                 struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >  {
> > +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +       struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
> > +       int cbndx = smmu_domain->cfg.cbndx;
> >         struct adreno_smmu_priv *priv;
> >
> >         smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> > @@ -248,6 +283,9 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >         priv->set_stall = qcom_adreno_smmu_set_stall;
> >         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >
> > +       if (qsmmu->data->actlrcfg_gfx)
> > +               qcom_smmu_set_actlr(dev, smmu, cbndx, qsmmu->data->actlrcfg_gfx);
>
> There was a feedback point against v4 that there can be more than two
> (apps + gpu) SMMU devices. No, we can not use additional compat
> strings, the SMMU units are compatible with each other. Please add
> matching between the smmu and particular actlr table using the IO
> address of the SMMU block.
>
> > +
> >         return 0;
> >  }
> >
> > @@ -274,6 +312,13 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
> >  static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >                 struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >  {
> > +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +       struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
> > +       int cbndx = smmu_domain->cfg.cbndx;
> > +
> > +       if (qsmmu->data->actlrcfg)
> > +               qcom_smmu_set_actlr(dev, smmu, cbndx, qsmmu->data->actlrcfg);
> > +
>
> One issue occured to me, while I was reviewing the patchset. The ACTLR
> settings are related to the whole SMMU setup, but we are applying them
> each time there is an SMMU context init (in other words, one per each
> domain). Is that correct? Or it's just that there is no better place
> for initialising the global register set? Would it be better to
> reprogram the ACTLR registers which are related just to this
> particular domain?

Ignore this, I went back to qcom_smmu_set_actlr() and understood that
I failed to read it properly.

>
> >         smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >
> >         return 0;
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
> > index f3b91963e234..cb4cb402c202 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
> > @@ -1,6 +1,6 @@
> >  /* SPDX-License-Identifier: GPL-2.0-only */
> >  /*
> > - * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved.
> > + * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
> >   */
> >
> >  #ifndef _ARM_SMMU_QCOM_H
> > @@ -24,7 +24,11 @@ struct qcom_smmu_config {
> >         const u32 *reg_offset;
> >  };
> >
> > +struct actlr_config;
> > +
> >  struct qcom_smmu_match_data {
> > +       const struct actlr_config *actlrcfg;
> > +       const struct actlr_config *actlrcfg_gfx;
> >         const struct qcom_smmu_config *cfg;
> >         const struct arm_smmu_impl *impl;
> >         const struct arm_smmu_impl *adreno_impl;
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > index d6d1a2a55cc0..0c7f700b27dd 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > @@ -990,9 +990,10 @@ static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
> >                  * expect simply identical entries for this case, but there's
> >                  * no harm in accommodating the generalisation.
> >                  */
> > -               if ((mask & smrs[i].mask) == mask &&
> > -                   !((id ^ smrs[i].id) & ~smrs[i].mask))
> > +
> > +               if (smr_is_subset(&smrs[i], id, mask))
> >                         return i;
> > +
> >                 /*
> >                  * If the new entry has any other overlap with an existing one,
> >                  * though, then there always exists at least one stream ID
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > index 703fd5817ec1..2e4f65412c6b 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > @@ -501,6 +501,11 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
> >                 writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
> >  }
> >
> > +static inline bool smr_is_subset(struct arm_smmu_smr *smrs, u16 id, u16 mask)
> > +{
> > +       return (mask & smrs->mask) == mask && !((id ^ smrs->id) & ~smrs->mask);
> > +}
> > +
> >  #define ARM_SMMU_GR0           0
> >  #define ARM_SMMU_GR1           1
> >  #define ARM_SMMU_CB(s, n)      ((s)->numpage + (n))
> > --
> > 2.17.1
> >
>
>
> --
> With best wishes
> Dmitry



-- 
With best wishes
Dmitry

Powered by blists - more mailing lists