[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Z9Z2sjWQHKgGJyGo@antec>
Date: Sun, 16 Mar 2025 06:58:58 +0000
From: Stafford Horne <shorne@...il.com>
To: Sahil Siddiq <icegambit91@...il.com>
Cc: jonas@...thpole.se, stefan.kristiansson@...nalahti.fi,
linux-openrisc@...r.kernel.org, linux-kernel@...r.kernel.org,
Sahil Siddiq <sahilcdq@...ton.me>
Subject: Re: [PATCH v2] openrisc: Add cacheinfo support
On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
> Add cacheinfo support for OpenRISC.
>
> Currently, a few CPU cache attributes pertaining to OpenRISC processors
> are exposed along with other unrelated CPU attributes in the procfs file
> system (/proc/cpuinfo). However, a few cache attributes remain unexposed.
> An attempt is also made to pull these CPU cache attributes without
> detecting if the relevant cache exists.
>
> This patch provides a mechanism that the generic cacheinfo infrastructure
> can employ to expose these attributes via the sysfs file system. These
> attributes are exposed in /sys/devices/system/cpu/cpuX/cache/indexN.
> Cache attributes are pulled only when the cache component is detected.
>
> The implementation to pull cache attributes from the processor's
> registers has been moved from arch/openrisc/kernel/setup.c with a few
> modifications.
>
> The patch also moves cache-related fields out of struct cpuinfo_or1k and
> into its own struct to keep the implementation straightforward. This
> reduces duplication of cache-related fields while keeping cpuinfo_or1k
> extensible in case more cache descriptors are added in the future.
>
> This implementation is based on similar work done for MIPS and LoongArch.
>
> Signed-off-by: Sahil Siddiq <sahilcdq@...ton.me>
> ---
> Changes from v1 -> v2:
> - Changed patch prefix from RFC to PATCH.
> - cacheinfo.c: Print number of sets. Remove integer padding.
> - dma.c
> (page_set_nocache): Access cache attributes only if component exists.
> (arch_sync_dma_for_device): Likewise.
> - cache.c: Likewise.
> - init.c: Likewise.
I think it may be better to split these cache op change out to a separate patch
which we can sequence in before cacheinfo and make your changes a series.
> arch/openrisc/include/asm/cpuinfo.h | 16 +++--
> arch/openrisc/kernel/Makefile | 2 +-
> arch/openrisc/kernel/cacheinfo.c | 106 ++++++++++++++++++++++++++++
> arch/openrisc/kernel/dma.c | 22 +++---
> arch/openrisc/kernel/setup.c | 45 ------------
> arch/openrisc/mm/cache.c | 11 ++-
> arch/openrisc/mm/init.c | 8 ++-
> 7 files changed, 144 insertions(+), 66 deletions(-)
> create mode 100644 arch/openrisc/kernel/cacheinfo.c
>
> diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h
> index 5e4744153d0e..82f5d4c06314 100644
> --- a/arch/openrisc/include/asm/cpuinfo.h
> +++ b/arch/openrisc/include/asm/cpuinfo.h
> @@ -15,16 +15,18 @@
> #ifndef __ASM_OPENRISC_CPUINFO_H
> #define __ASM_OPENRISC_CPUINFO_H
>
> +struct cache_desc {
> + u32 size;
> + u32 sets;
> + u32 block_size;
> + u32 ways;
Considering the changes below to add cache available checks, maybe we
want to add a field here, such as `bool present`. Or a flags field like
is used in loongarch?
Though just checking the UPR is probably faster in all cases, it may be good to
have a helper function for that.
> +};
> +
> struct cpuinfo_or1k {
> u32 clock_frequency;
>
> - u32 icache_size;
> - u32 icache_block_size;
> - u32 icache_ways;
> -
> - u32 dcache_size;
> - u32 dcache_block_size;
> - u32 dcache_ways;
> + struct cache_desc icache;
> + struct cache_desc dcache;
>
> u16 coreid;
> };
> diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
> index 79129161f3e0..e4c7d9bdd598 100644
> --- a/arch/openrisc/kernel/Makefile
> +++ b/arch/openrisc/kernel/Makefile
> @@ -7,7 +7,7 @@ extra-y := vmlinux.lds
>
> obj-y := head.o setup.o or32_ksyms.o process.o dma.o \
> traps.o time.o irq.o entry.o ptrace.o signal.o \
> - sys_call_table.o unwinder.o
> + sys_call_table.o unwinder.o cacheinfo.o
>
> obj-$(CONFIG_SMP) += smp.o sync-timer.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c
> new file mode 100644
> index 000000000000..6bb81e246f7e
> --- /dev/null
> +++ b/arch/openrisc/kernel/cacheinfo.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * OpenRISC cacheinfo support
> + *
> + * Based on work done for MIPS and LoongArch. All original copyrights
> + * apply as per the original source declaration.
> + *
> + * OpenRISC implementation:
> + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@...ton.me>
> + */
> +
> +#include <linux/cacheinfo.h>
> +#include <asm/cpuinfo.h>
> +#include <asm/spr.h>
> +#include <asm/spr_defs.h>
> +
> +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
> + unsigned int level, struct cache_desc *cache, int cpu)
> +{
> + this_leaf->type = type;
> + this_leaf->level = level;
> + this_leaf->coherency_line_size = cache->block_size;
> + this_leaf->number_of_sets = cache->sets;
> + this_leaf->ways_of_associativity = cache->ways;
> + this_leaf->size = cache->size;
> + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
> +}
> +
> +int init_cache_level(unsigned int cpu)
> +{
> + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> + int leaves = 0, levels = 0;
> + unsigned long upr = mfspr(SPR_UPR);
> + unsigned long iccfgr, dccfgr;
> +
> + if (!(upr & SPR_UPR_UP)) {
> + printk(KERN_INFO
> + "-- no UPR register... unable to detect configuration\n");
> + return -ENOENT;
> + }
> +
> + if (upr & SPR_UPR_DCP) {
> + dccfgr = mfspr(SPR_DCCFGR);
> + cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> + cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> + cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> + cpuinfo->dcache.size =
> + cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size;
> + leaves += 1;
> + printk(KERN_INFO
> + "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> + cpuinfo->dcache.size, cpuinfo->dcache.block_size,
> + cpuinfo->dcache.sets,
> + cpuinfo->dcache.ways);
The indentation of sets looks a bit off here.
> + } else
> + printk(KERN_INFO "-- dcache disabled\n");
> +
> + if (upr & SPR_UPR_ICP) {
> + iccfgr = mfspr(SPR_ICCFGR);
> + cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> + cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> + cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> + cpuinfo->icache.size =
> + cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size;
> + leaves += 1;
> + printk(KERN_INFO
> + "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> + cpuinfo->icache.size, cpuinfo->icache.block_size,
> + cpuinfo->icache.sets,
> + cpuinfo->icache.ways);
The indentation of sets looks a bit off here. Maybe its the others that are out
of line, but can you fix? Also I think sets and ways could be on the same line.
> + } else
> + printk(KERN_INFO "-- icache disabled\n");
> +
> + if (!leaves)
> + return -ENOENT;
> +
> + levels = 1;
> +
> + this_cpu_ci->num_leaves = leaves;
> + this_cpu_ci->num_levels = levels;
> +
> + return 0;
> +}
> +
> +int populate_cache_leaves(unsigned int cpu)
> +{
> + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> + struct cacheinfo *this_leaf = this_cpu_ci->info_list;
> + int level = 1;
> +
> + if (cpuinfo->dcache.ways) {
> + ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu);
> + this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ?
> + CACHE_WRITE_BACK : CACHE_WRITE_THROUGH;
> + this_leaf++;
> + }
> +
> + if (cpuinfo->icache.ways)
> + ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu);
> +
> + this_cpu_ci->cpu_map_populated = true;
> +
> + return 0;
> +}
> diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
> index b3edbb33b621..ffb161e41e9d 100644
> --- a/arch/openrisc/kernel/dma.c
> +++ b/arch/openrisc/kernel/dma.c
> @@ -36,8 +36,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
> flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>
> /* Flush page out of dcache */
> - for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
> - mtspr(SPR_DCBFR, cl);
> + if (cpuinfo->dcache.ways) {
> + for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache.block_size)
> + mtspr(SPR_DCBFR, cl);
> + }
I think it would be better to move this to cacheflush.h as a function like
flush_dcache_range() or local_dcache_range_flush().
> return 0;
> }
> @@ -104,15 +106,19 @@ void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
> switch (dir) {
> case DMA_TO_DEVICE:
> /* Flush the dcache for the requested range */
> - for (cl = addr; cl < addr + size;
> - cl += cpuinfo->dcache_block_size)
> - mtspr(SPR_DCBFR, cl);
> + if (cpuinfo->dcache.ways) {
> + for (cl = addr; cl < addr + size;
> + cl += cpuinfo->dcache.block_size)
> + mtspr(SPR_DCBFR, cl);
> + }
Also here,I think it would be better to move this to cacheflush.h as a function like
flush_dcache_range().
Or, local_dcache_range_flush(), which seems to be the convention we use in
cacheflush.h/cache.c.
> break;
> case DMA_FROM_DEVICE:
> /* Invalidate the dcache for the requested range */
> - for (cl = addr; cl < addr + size;
> - cl += cpuinfo->dcache_block_size)
> - mtspr(SPR_DCBIR, cl);
> + if (cpuinfo->dcache.ways) {
> + for (cl = addr; cl < addr + size;
> + cl += cpuinfo->dcache.block_size)
> + mtspr(SPR_DCBIR, cl);
> + }
This one could be invalidate_dcache_range(). Note, this will also be useful
for the kexec patches that I am working on.
Or, local_dcache_range_inv(), which seems to be the convention we use in
cacheflush.h/cache.c.
> break;
> default:
> /*
> diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
> index be56eaafc8b9..38172c0989cf 100644
> --- a/arch/openrisc/kernel/setup.c
> +++ b/arch/openrisc/kernel/setup.c
> @@ -107,27 +107,6 @@ static void print_cpuinfo(void)
> printk(KERN_INFO "CPU: OpenRISC-%x (revision %d) @%d MHz\n",
> version, revision, cpuinfo->clock_frequency / 1000000);
>
> - if (!(upr & SPR_UPR_UP)) {
> - printk(KERN_INFO
> - "-- no UPR register... unable to detect configuration\n");
> - return;
> - }
> -
> - if (upr & SPR_UPR_DCP)
> - printk(KERN_INFO
> - "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
> - cpuinfo->dcache_size, cpuinfo->dcache_block_size,
> - cpuinfo->dcache_ways);
> - else
> - printk(KERN_INFO "-- dcache disabled\n");
> - if (upr & SPR_UPR_ICP)
> - printk(KERN_INFO
> - "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
> - cpuinfo->icache_size, cpuinfo->icache_block_size,
> - cpuinfo->icache_ways);
> - else
> - printk(KERN_INFO "-- icache disabled\n");
> -
> if (upr & SPR_UPR_DMP)
> printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n",
> 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
> @@ -155,8 +134,6 @@ static void print_cpuinfo(void)
> void __init setup_cpuinfo(void)
> {
> struct device_node *cpu;
> - unsigned long iccfgr, dccfgr;
> - unsigned long cache_set_size;
> int cpu_id = smp_processor_id();
> struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id];
>
> @@ -164,20 +141,6 @@ void __init setup_cpuinfo(void)
> if (!cpu)
> panic("Couldn't find CPU%d in device tree...\n", cpu_id);
>
> - iccfgr = mfspr(SPR_ICCFGR);
> - cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> - cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> - cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> - cpuinfo->icache_size =
> - cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size;
> -
> - dccfgr = mfspr(SPR_DCCFGR);
> - cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> - cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> - cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> - cpuinfo->dcache_size =
> - cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size;
> -
> if (of_property_read_u32(cpu, "clock-frequency",
> &cpuinfo->clock_frequency)) {
> printk(KERN_WARNING
> @@ -320,14 +283,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
> seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV);
> }
> seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ);
> - seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size);
> - seq_printf(m, "dcache block size\t: %d bytes\n",
> - cpuinfo->dcache_block_size);
> - seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways);
> - seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size);
> - seq_printf(m, "icache block size\t: %d bytes\n",
> - cpuinfo->icache_block_size);
> - seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways);
> seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n",
> 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
> 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW));
> diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c
> index eb43b73f3855..acdf2dc256bf 100644
> --- a/arch/openrisc/mm/cache.c
> +++ b/arch/openrisc/mm/cache.c
> @@ -16,10 +16,15 @@
> #include <asm/cacheflush.h>
> #include <asm/tlbflush.h>
>
> -static __always_inline void cache_loop(struct page *page, const unsigned int reg)
> +static __always_inline void cache_loop(struct page *page, const unsigned int reg,
> + const unsigned int cache_type)
> {
> unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT;
> unsigned long line = paddr & ~(L1_CACHE_BYTES - 1);
> + unsigned long upr = mfspr(SPR_UPR);
> +
> + if (!(upr & SPR_UPR_UP & cache_type))
> + return;
>
> while (line < paddr + PAGE_SIZE) {
> mtspr(reg, line);
> @@ -29,13 +34,13 @@ static __always_inline void cache_loop(struct page *page, const unsigned int reg
>
> void local_dcache_page_flush(struct page *page)
> {
> - cache_loop(page, SPR_DCBFR);
> + cache_loop(page, SPR_DCBFR, SPR_UPR_DCP);
> }
> EXPORT_SYMBOL(local_dcache_page_flush);
>
> void local_icache_page_inv(struct page *page)
> {
> - cache_loop(page, SPR_ICBIR);
> + cache_loop(page, SPR_ICBIR, SPR_UPR_ICP);
> }
> EXPORT_SYMBOL(local_icache_page_inv);
OK, if we move the range flush and invalidate here we will need to add to this
cache_loop a bit more.
> diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
> index d0cb1a0126f9..bbe16546c5b9 100644
> --- a/arch/openrisc/mm/init.c
> +++ b/arch/openrisc/mm/init.c
> @@ -124,6 +124,7 @@ static void __init map_ram(void)
> void __init paging_init(void)
> {
> int i;
> + unsigned long upr;
>
> printk(KERN_INFO "Setting up paging and PTEs.\n");
>
> @@ -176,8 +177,11 @@ void __init paging_init(void)
> barrier();
>
> /* Invalidate instruction caches after code modification */
> - mtspr(SPR_ICBIR, 0x900);
> - mtspr(SPR_ICBIR, 0xa00);
> + upr = mfspr(SPR_UPR);
> + if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
> + mtspr(SPR_ICBIR, 0x900);
> + mtspr(SPR_ICBIR, 0xa00);
> + }
Here we could use new utilities such as local_icache_range_inv(0x900,
L1_CACHE_BYTES);
Or something like local_icache_block_inv(0x900). This only needs to flush a
single block as the code it is invalidating is just 2 instructions 8 bytes:
.org 0x900
l.j boot_dtlb_miss_handler
l.nop
.org 0xa00
l.j boot_itlb_miss_handler
l.nop
>
>
> /* New TLB miss handlers and kernel page tables are in now place.
> * Make sure that page flags get updated for all pages in TLB by
> --
> 2.48.1
-Stafford
Powered by blists - more mailing lists