[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d2ec6fe3-db78-4e8b-ac81-fd4c631a599b@collabora.com>
Date: Wed, 12 Mar 2025 11:20:33 -0300
From: Ariel D'Alessandro <ariel.dalessandro@...labora.com>
To: AngeloGioacchino Del Regno <angelogioacchino.delregno@...labora.com>,
dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Cc: boris.brezillon@...labora.com, robh@...nel.org, steven.price@....com,
maarten.lankhorst@...ux.intel.com, mripard@...nel.org, tzimmermann@...e.de,
airlied@...il.com, simona@...ll.ch, kernel@...labora.com,
linux-mediatek@...ts.infradead.org, linux-arm-kernel@...ts.infradead.org,
sjoerd@...labora.com
Subject: Re: [PATCH v1 4/6] drm/panfrost: Add support for AARCH64_4K page
table format
Angelo,
On 3/11/25 6:10 AM, AngeloGioacchino Del Regno wrote:
> Il 10/03/25 20:59, Ariel D'Alessandro ha scritto:
>> Currently, Panfrost only supports MMU configuration in "LEGACY" (as
>> Bifrost calls it) mode, a (modified) version of LPAE "Large Physical
>> Address Extension", which in Linux we've called "mali_lpae".
>>
>> This commit adds support for conditionally enabling AARCH64_4K page
>> table format. To achieve that, a "GPU optional configurations" field was
>> added to `struct panfrost_features` with the related flag.
[snip]
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/
>> drm/panfrost/panfrost_mmu.c
>> index 31df3a96f89bd..4a9b8de2ff987 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
[snip]
>> +static void panfrost_mmu_cfg_init(struct panfrost_mmu *mmu,
>> + enum io_pgtable_fmt fmt)
>> +{
>> + struct panfrost_device *pfdev = mmu->pfdev;
>> +
>> + switch (fmt) {
>> + case ARM_64_LPAE_S1:
>> + mmu_cfg_init_aarch64_4k(mmu);
>> + break;
>> + case ARM_MALI_LPAE:
>> + mmu_cfg_init_mali_lpae(mmu);
>> + break;
>> + default:
>
> If you add a
> /* That should never happen */
>
> ...it's clear-er why this function doesn't fail (but still raises some
> eyebrows,
> because if the `default` case is not reachable, why does it even have a
> print?).
Makes sense. In that case, makes sense to just simplify it as:
default:
/* This should never happen */
WARN_ON(1);
break;
>> + dev_WARN_ONCE(pfdev->dev, 1, "Unhandled page table format\n");
>> + break;
>> + }
>> +}
>> +
>> static void
>> _panfrost_mmu_as_control_write(struct panfrost_device *pfdev, u32
>> as_nr,
>> - u64 transtab, u64 memattr)
>> + u64 transtab, u64 memattr, u64 transcfg)
>> {
>> mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL,
>> AS_COMMAND_FLUSH_MEM);
>> @@ -133,25 +223,28 @@ _panfrost_mmu_as_control_write(struct
>> panfrost_device *pfdev, u32 as_nr,
>> mmu_write(pfdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
>> mmu_write(pfdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
>> + mmu_write(pfdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg));
>> + mmu_write(pfdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg));
>> +
>> write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
>> +
>> + dev_dbg(pfdev->dev, "mmu_as_control: as=%d, transtab=0x%016llx,
>> memattr=0x%016llx, transcfg=0x%016llx",
>> + as_nr, transtab, memattr, transcfg);
>> }
>> static void panfrost_mmu_enable(struct panfrost_device *pfdev,
>> struct panfrost_mmu *mmu)
>> {
>> - int as_nr = mmu->as;
>> - struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg;
>> - u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
>> - u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
>> -
>> /* Need to revisit mem attrs.
>> * NC is the default, Mali driver is inner WT.
>> */
>> - _panfrost_mmu_as_control_write(pfdev, as_nr, transtab, memattr);
>> + _panfrost_mmu_as_control_write(pfdev, mmu->as, mmu->cfg.transtab,
>> + mmu->cfg.memattr, mmu->cfg.transcfg);
>> }
>> static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32
>> as_nr)
>> {
>> - _panfrost_mmu_as_control_write(pfdev, as_nr, 0, 0);
>> + _panfrost_mmu_as_control_write(pfdev, as_nr, 0, 0,
>> + AS_TRANSCFG_ADRMODE_UNMAPPED);
>> }
>> u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct
>> panfrost_mmu *mmu)
>> @@ -616,6 +709,7 @@ struct panfrost_mmu
>> *panfrost_mmu_ctx_create(struct panfrost_device *pfdev)
>> {
>> u32 va_bits = GPU_MMU_FEATURES_VA_BITS(pfdev-
>> >features.mmu_features);
>> u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(pfdev-
>> >features.mmu_features);
>> + enum io_pgtable_fmt fmt = ARM_MALI_LPAE;
>
> Double initialization! :-)
Will fix in v2.
>> struct panfrost_mmu *mmu;
>> mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
>> @@ -641,16 +735,28 @@ struct panfrost_mmu
>> *panfrost_mmu_ctx_create(struct panfrost_device *pfdev)
>> .iommu_dev = pfdev->dev,
>> };
>> - mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu-
>> >pgtbl_cfg,
>> - mmu);
>> - if (!mmu->pgtbl_ops) {
>> - kfree(mmu);
>> - return ERR_PTR(-EINVAL);
>> + if (pfdev->comp->gpu_configs & BIT(GPU_CONFIG_AARCH64_4K)) {
>
> Why aren't you performing this check before kzalloc?
>
> If you do so, you will be able to avoid having a goto, because this
> check will
> simply return an error (struct not allocated yet, nothing to kfree).
> This also means that you won't have to modify anything about the error
> handling
> of the alloc_io_pgtable_ops below....
Yup, definitely agreed. Thanks!
>> + if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU)) {
>> + dev_err_once(pfdev->dev,
>> + "AARCH64_4K page table not supported\n");
>> + goto err_free_mmu;
>> + }
>> + fmt = ARM_64_LPAE_S1;
>> }
>
> } else {
> fmt = ARM_MALI_LPAE;
> }
Ack. Will fix in v2.
>
>> + mmu->pgtbl_ops = alloc_io_pgtable_ops(fmt, &mmu->pgtbl_cfg, mmu);
>> + if (!mmu->pgtbl_ops)
>> + goto err_free_mmu;
>> +
>> + panfrost_mmu_cfg_init(mmu, fmt);
>> +
>> kref_init(&mmu->refcount);
>> return mmu;
>> +
>> +err_free_mmu:
>> + kfree(mmu);
>> + return ERR_PTR(-EINVAL);
>> }
>> static const char *access_type_name(struct panfrost_device *pfdev,
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/
>> drm/panfrost/panfrost_regs.h
>> index b5f279a19a084..2b8f1617b8369 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_regs.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
>> @@ -301,6 +301,17 @@
>> #define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x04) /* (RW)
>> Translation Table Base Address for address space n, high word */
>> #define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x08) /* (RW) Memory
>> attributes for address space n, low word. */
>> #define AS_MEMATTR_HI(as) (MMU_AS(as) + 0x0C) /* (RW) Memory
>> attributes for address space n, high word. */
>> +#define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2)
>> +#define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \
>> + ((w) ? BIT(0) : 0) | \
>> + ((r) ? BIT(1) : 0))
>> +#define AS_MEMATTR_AARCH64_SH_MIDGARD_INNER (0 << 4)
>> +#define AS_MEMATTR_AARCH64_SH_CPU_INNER (1 << 4)
>> +#define AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH (2 << 4)
>> +#define AS_MEMATTR_AARCH64_SHARED (0 << 6)
>> +#define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6)
>> +#define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6)
>> +#define AS_MEMATTR_AARCH64_FAULT (3 << 6)
>> #define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) /* (RW) Lock
>> region address for address space n, low word */
>> #define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) /* (RW) Lock
>> region address for address space n, high word */
>> #define AS_COMMAND(as) (MMU_AS(as) + 0x18) /* (WO) MMU
>> command register for address space n */
>> @@ -311,6 +322,24 @@
>> /* Additional Bifrost AS registers */
>> #define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) /* (RW)
>> Translation table configuration for address space n, low word */
>> #define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) /* (RW)
>> Translation table configuration for address space n, high word */
>> +#define AS_TRANSCFG_ADRMODE_LEGACY (0 << 0)
>> +#define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0)
>> +#define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0)
>> +#define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0)
>> +#define AS_TRANSCFG_ADRMODE_AARCH64_64K (8 << 0)
>
> "Anything" shifted in any direction by 0 is equal to the same
> "anything" :-)
>
> Those are just 0,1,2,6,8
Well, I agree... However, I'm copying this from panthor as-is. As these
are similar drivers, I just kept all the macro definitions.
>
>> +#define AS_TRANSCFG_INA_BITS(x) ((x) << 6)
>> +#define AS_TRANSCFG_OUTA_BITS(x) ((x) << 14)
>> +#define AS_TRANSCFG_SL_CONCAT BIT(22)
>> +#define AS_TRANSCFG_PTW_MEMATTR_NC (1 << 24)
>
> BIT(24)
>
>> +#define AS_TRANSCFG_PTW_MEMATTR_WB (2 << 24)
>
> BIT(25)
>
>> +#define AS_TRANSCFG_PTW_SH_NS (0 << 28)
>
> 0
>
>> +#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
>
> BIT(29)
>
>> +#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
>
> GENMASK(29, 28) or BIT(28) | BIT(29)
>
> (btw, rinse and repeat for the memattrs)
I think the criteria used in panfrost/panthor for these definitions is:
* if the register field is 1 bit, use BIT()
* if the register field is >1 bit, use the value (as defined in the
datasheet) an shift it.
* and -be super explicit- do this even if the value is 0.
I don't really have a strong opinion, but I'd attach to the
subsystem/driver criteria used as much as possible :)
Thanks!
--
Ariel D'Alessandro
Software Engineer
Collabora Ltd.
Platinum Building, St John's Innovation Park, Cambridge CB4 0DS, UK
Registered in England & Wales, no. 5513718
Powered by blists - more mailing lists