lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <808e1041-0d61-4ee8-9778-0f9b7d0fb719@quicinc.com>
Date: Fri, 6 Dec 2024 18:06:10 +0530
From: Bibek Kumar Patro <quic_bibekkum@...cinc.com>
To: Rob Clark <robdclark@...il.com>
CC: <will@...nel.org>, <robin.murphy@....com>, <joro@...tes.org>,
        <jgg@...pe.ca>, <jsnitsel@...hat.com>, <robh@...nel.org>,
        <krzysztof.kozlowski@...aro.org>, <quic_c_gdjako@...cinc.com>,
        <dmitry.baryshkov@...aro.org>, <iommu@...ts.linux.dev>,
        <linux-arm-msm@...r.kernel.org>,
        <linux-arm-kernel@...ts.infradead.org>, <linux-kernel@...r.kernel.org>,
        Connor Abbott <cwabbott0@...il.com>,
        "Rob
 Clark" <robdclark@...omium.org>
Subject: Re: [PATCH RESEND v17 3/5] iommu/arm-smmu: add support for PRR bit
 setup



On 12/4/2024 8:54 PM, Rob Clark wrote:
> On Wed, Dec 4, 2024 at 3:28 AM Bibek Kumar Patro
> <quic_bibekkum@...cinc.com> wrote:
>>
>>
>>
>> On 11/22/2024 10:04 PM, Rob Clark wrote:
>>> On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
>>> <quic_bibekkum@...cinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 11/21/2024 3:40 AM, Rob Clark wrote:
>>>>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@...il.com> wrote:
>>>>>>
>>>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>>>>>> <quic_bibekkum@...cinc.com> wrote:
>>>>>>>
>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>>> sequence as per request.
>>>>>>>
>>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>>> configuration registers through adreno-smmu private
>>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>>
>>>>>>> Suggested-by: Rob Clark <robdclark@...il.com>
>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@...cinc.com>
>>>>>>> ---
>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>>     include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>>>>>     3 files changed, 53 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> index d26f5aea248e..0e4f3fbda961 100644
>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> @@ -16,6 +16,8 @@
>>>>>>>
>>>>>>>     #define QCOM_DUMMY_VAL -1
>>>>>>>
>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>> +
>>>>>>>     static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>>>>     {
>>>>>>>            return container_of(smmu, struct qcom_smmu, smmu);
>>>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>>     }
>>>>>>>
>>>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>>>>> +{
>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>>> +       u32 reg = 0;
>>>>>>> +
>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>>> +       if (set)
>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>>>>>> +{
>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>> +
>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>> +
>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>> +}
>>>>>>> +
>>>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>>
>>>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>>>>>     static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>                    struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>>>>>     {
>>>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>>>>>            struct adreno_smmu_priv *priv;
>>>>>>>
>>>>>>>            smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>>> +       priv->set_prr_bit = NULL;
>>>>>>> +       priv->set_prr_addr = NULL;
>>>>>>> +
>>>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>>>>
>>>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>>>>>> mmu-500, so I guess the support actually goes back further.
>>>>>>
>>>>>> I'm curious if we can just rely on this being supported by any hw that
>>>>>> has a6xx or newer?
>>>>>
>>>>>
>>>>> Also, unrelated, but we can't assume the smmu is powered when drm
>>>>> driver calls set_prr_bit/addr, could you add in runpm get/put around
>>>>> the register access?
>>>>>
>>>>
>>>> I see, thanks for this observation.
>>>> It's surely a possible case, when they access these registers
>>>> SMMU state is off.
>>>> I will add the suggested runpm ops around the register access.
>>>>
>>>>> Otherwise Conner and I have vk sparse residency working now
>>>>>
>>>>
>>>> Sorry, could not get this. Did you mean that vk sparse residency
>>>> is working now using this patch?
>>>
>>> Yes, vk sparse residency is working with this patch (plus addition of
>>> runpm get/put, plus drm/msm patches plus turnip patches) ;-)
>>>
>>
>> Thanks for testing the sparse residency feature with our patch Rob,
>> I have an additional query regarding the adreno private interfaces.
>> Specifically, I was referring to other interfaces such as
>> qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a
>> runpm get/put around the register access in this context.
> 
> get_fault_info() is called from the iommu fault handler callback, so
> from the fault irq handler, which is why it didn't need the runpm
> get/put.  Maybe it is bad to make this assumption about usage, but
> then again adreno_smmu_priv isn't really a general purpose interface.
> 

Ah okay, got it.
I was just going through all the adreno_smmmu_priv interfaces just
to get a better understanding of it's interaction with smmu and it seems
like apart from PRR and get_fault_info(), set_ttbr0_cfg(),
resume_translation() is also having reg access but not voting.
Should we put runpm_put/get here as well or these can be excluded.
<Just curious about the thought process behind this, is it because of
some sequence when to put a vote, like reg access in middle of smmu
power cycle and not in beginning or end.>

>> Could you please clarify whether we need an SMMU vote around register
>> access in the case of PRR? IMO, should the users of this callback ensure
>> they put a vote before accessing the cb?
> 
> How can drm vote for the smmu device?  I guess it could power up
> itself and rely on device-link.. but that is pretty overkill to power
> up the entire gpu in this case.  I think it is best for the vote to
> happen in the PRR callbacks.
> 

Okay I see, GPU can only power itself up through <gpu_state_get I
assume> but won't have exclusive access to power on the smmu only.

Incase we go forward to put vote in PRR callback for SMMU, I was
planning that we can refactor existing arm_smmu_rpm_put/get() from
arm_smmu.c to it's header file so that the same can be used in
arm_smmu_qcom.c ? What's your thoughts on this?

Thanks & regards,
Bibek

> BR,
> -R
> 
>> [1]:
>> https://lore.kernel.org/all/20210610214431.539029-1-robdclark@gmail.com/
>>
>> Thanks & regards,
>> Bibek
>>
>>> BR,
>>> -R
>>>
>>>> Thanks & regards,
>>>> Bibek
>>>>
>>>>> BR,
>>>>> -R
>>>>>
>>>>>>
>>>>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>>>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>>>>>> +       }
>>>>>>>
>>>>>>>            return 0;
>>>>>>>     }
>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>>
>>>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>>
>>>>>>>     #define ARM_SMMU_CB_RESUME             0x8
>>>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>>> index c637e0997f6d..614665153b3e 100644
>>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>>>>>      *                 the GPU driver must call resume_translation()
>>>>>>>      * @resume_translation: Resume translation after a fault
>>>>>>>      *
>>>>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>>>>>> + *             targets without PRR support. Exercise caution and verify target
>>>>>>> + *             capabilities before invoking these callbacks to prevent potential
>>>>>>> + *             runtime errors or unexpected behavior.
>>>>>>> + *
>>>>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>>>>>> + *                ACTLR register bits, currently used to configure
>>>>>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>>>>>> + *                setup and reset sequence as requested.
>>>>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>>>>>> + *                physical address of PRR page passed from
>>>>>>> + *                GPU driver.
>>>>>>>      *
>>>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>>         void (*set_stall)(const void *cookie, bool enabled);
>>>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
>>>>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>>>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>>>>>     };
>>>>>>>
>>>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>>> --
>>>>>>> 2.34.1
>>>>>>>
>>>>
>>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ