linux-kernel - Re: [PATCH v2] openrisc: Add cacheinfo support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z9Z2sjWQHKgGJyGo@antec>
Date: Sun, 16 Mar 2025 06:58:58 +0000
From: Stafford Horne <shorne@...il.com>
To: Sahil Siddiq <icegambit91@...il.com>
Cc: jonas@...thpole.se, stefan.kristiansson@...nalahti.fi,
	linux-openrisc@...r.kernel.org, linux-kernel@...r.kernel.org,
	Sahil Siddiq <sahilcdq@...ton.me>
Subject: Re: [PATCH v2] openrisc: Add cacheinfo support

On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
> Add cacheinfo support for OpenRISC.
> 
> Currently, a few CPU cache attributes pertaining to OpenRISC processors
> are exposed along with other unrelated CPU attributes in the procfs file
> system (/proc/cpuinfo). However, a few cache attributes remain unexposed.
> An attempt is also made to pull these CPU cache attributes without
> detecting if the relevant cache exists.
> 
> This patch provides a mechanism that the generic cacheinfo infrastructure
> can employ to expose these attributes via the sysfs file system. These
> attributes are exposed in /sys/devices/system/cpu/cpuX/cache/indexN.
> Cache attributes are pulled only when the cache component is detected.
> 
> The implementation to pull cache attributes from the processor's
> registers has been moved from arch/openrisc/kernel/setup.c with a few
> modifications.
> 
> The patch also moves cache-related fields out of struct cpuinfo_or1k and
> into its own struct to keep the implementation straightforward. This
> reduces duplication of cache-related fields while keeping cpuinfo_or1k
> extensible in case more cache descriptors are added in the future.
> 
> This implementation is based on similar work done for MIPS and LoongArch.
> 
> Signed-off-by: Sahil Siddiq <sahilcdq@...ton.me>
> ---
> Changes from v1 -> v2:
> - Changed patch prefix from RFC to PATCH.
> - cacheinfo.c: Print number of sets. Remove integer padding.
> - dma.c
>   (page_set_nocache): Access cache attributes only if component exists.
>   (arch_sync_dma_for_device): Likewise.
> - cache.c: Likewise.
> - init.c: Likewise.

I think it may be better to split these cache op change out to a separate patch
which we can sequence in before cacheinfo and make your changes a series.

>  arch/openrisc/include/asm/cpuinfo.h |  16 +++--
>  arch/openrisc/kernel/Makefile       |   2 +-
>  arch/openrisc/kernel/cacheinfo.c    | 106 ++++++++++++++++++++++++++++
>  arch/openrisc/kernel/dma.c          |  22 +++---
>  arch/openrisc/kernel/setup.c        |  45 ------------
>  arch/openrisc/mm/cache.c            |  11 ++-
>  arch/openrisc/mm/init.c             |   8 ++-
>  7 files changed, 144 insertions(+), 66 deletions(-)
>  create mode 100644 arch/openrisc/kernel/cacheinfo.c
> 
> diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h
> index 5e4744153d0e..82f5d4c06314 100644
> --- a/arch/openrisc/include/asm/cpuinfo.h
> +++ b/arch/openrisc/include/asm/cpuinfo.h
> @@ -15,16 +15,18 @@
>  #ifndef __ASM_OPENRISC_CPUINFO_H
>  #define __ASM_OPENRISC_CPUINFO_H
>  
> +struct cache_desc {
> +	u32 size;
> +	u32 sets;
> +	u32 block_size;
> +	u32 ways;

Considering the changes below to add cache available checks, maybe we
want to add a field here, such as `bool present`.  Or a flags field like
is used in loongarch?

Though just checking the UPR is probably faster in all cases, it may be good to
have a helper function for that.

> +};
> +
>  struct cpuinfo_or1k {
>  	u32 clock_frequency;
>  
> -	u32 icache_size;
> -	u32 icache_block_size;
> -	u32 icache_ways;
> -
> -	u32 dcache_size;
> -	u32 dcache_block_size;
> -	u32 dcache_ways;
> +	struct cache_desc icache;
> +	struct cache_desc dcache;
>  
>  	u16 coreid;
>  };
> diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
> index 79129161f3e0..e4c7d9bdd598 100644
> --- a/arch/openrisc/kernel/Makefile
> +++ b/arch/openrisc/kernel/Makefile
> @@ -7,7 +7,7 @@ extra-y	:= vmlinux.lds
>  
>  obj-y	:= head.o setup.o or32_ksyms.o process.o dma.o \
>  	   traps.o time.o irq.o entry.o ptrace.o signal.o \
> -	   sys_call_table.o unwinder.o
> +	   sys_call_table.o unwinder.o cacheinfo.o
>  
>  obj-$(CONFIG_SMP)		+= smp.o sync-timer.o
>  obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
> diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c
> new file mode 100644
> index 000000000000..6bb81e246f7e
> --- /dev/null
> +++ b/arch/openrisc/kernel/cacheinfo.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * OpenRISC cacheinfo support
> + *
> + * Based on work done for MIPS and LoongArch. All original copyrights
> + * apply as per the original source declaration.
> + *
> + * OpenRISC implementation:
> + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@...ton.me>
> + */
> +
> +#include <linux/cacheinfo.h>
> +#include <asm/cpuinfo.h>
> +#include <asm/spr.h>
> +#include <asm/spr_defs.h>
> +
> +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
> +				unsigned int level, struct cache_desc *cache, int cpu)
> +{
> +	this_leaf->type = type;
> +	this_leaf->level = level;
> +	this_leaf->coherency_line_size = cache->block_size;
> +	this_leaf->number_of_sets = cache->sets;
> +	this_leaf->ways_of_associativity = cache->ways;
> +	this_leaf->size = cache->size;
> +	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
> +}
> +
> +int init_cache_level(unsigned int cpu)
> +{
> +	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +	int leaves = 0, levels = 0;
> +	unsigned long upr = mfspr(SPR_UPR);
> +	unsigned long iccfgr, dccfgr;
> +
> +	if (!(upr & SPR_UPR_UP)) {
> +		printk(KERN_INFO
> +		       "-- no UPR register... unable to detect configuration\n");
> +		return -ENOENT;
> +	}
> +
> +	if (upr & SPR_UPR_DCP) {
> +		dccfgr = mfspr(SPR_DCCFGR);
> +		cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> +		cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> +		cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> +		cpuinfo->dcache.size =
> +		    cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size;
> +		leaves += 1;
> +		printk(KERN_INFO
> +		       "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> +		       cpuinfo->dcache.size, cpuinfo->dcache.block_size,
> +			   cpuinfo->dcache.sets,
> +		       cpuinfo->dcache.ways);

The indentation of sets looks a bit off here.

> +	} else
> +		printk(KERN_INFO "-- dcache disabled\n");
> +
> +	if (upr & SPR_UPR_ICP) {
> +		iccfgr = mfspr(SPR_ICCFGR);
> +		cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> +		cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> +		cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> +		cpuinfo->icache.size =
> +		    cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size;
> +		leaves += 1;
> +		printk(KERN_INFO
> +		       "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> +		       cpuinfo->icache.size, cpuinfo->icache.block_size,
> +			   cpuinfo->icache.sets,
> +		       cpuinfo->icache.ways);

The indentation of sets looks a bit off here. Maybe its the others that are out
of line, but can you fix?  Also I think sets and ways could be on the same line.

> +	} else
> +		printk(KERN_INFO "-- icache disabled\n");
> +
> +	if (!leaves)
> +		return -ENOENT;
> +
> +	levels = 1;
> +
> +	this_cpu_ci->num_leaves = leaves;
> +	this_cpu_ci->num_levels = levels;
> +
> +	return 0;
> +}
> +
> +int populate_cache_leaves(unsigned int cpu)
> +{
> +	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
> +	int level = 1;
> +
> +	if (cpuinfo->dcache.ways) {
> +		ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu);
> +		this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ?
> +					CACHE_WRITE_BACK : CACHE_WRITE_THROUGH;
> +		this_leaf++;
> +	}
> +
> +	if (cpuinfo->icache.ways)
> +		ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu);
> +
> +	this_cpu_ci->cpu_map_populated = true;
> +
> +	return 0;
> +}
> diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
> index b3edbb33b621..ffb161e41e9d 100644
> --- a/arch/openrisc/kernel/dma.c
> +++ b/arch/openrisc/kernel/dma.c
> @@ -36,8 +36,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
>  	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>  
>  	/* Flush page out of dcache */
> -	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
> -		mtspr(SPR_DCBFR, cl);
> +	if (cpuinfo->dcache.ways) {
> +		for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache.block_size)
> +			mtspr(SPR_DCBFR, cl);
> +	}

I think it would be better to move this to cacheflush.h as a function like
flush_dcache_range() or local_dcache_range_flush().

>  	return 0;
>  }
> @@ -104,15 +106,19 @@ void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
>  	switch (dir) {
>  	case DMA_TO_DEVICE:
>  		/* Flush the dcache for the requested range */
> -		for (cl = addr; cl < addr + size;
> -		     cl += cpuinfo->dcache_block_size)
> -			mtspr(SPR_DCBFR, cl);
> +		if (cpuinfo->dcache.ways) {
> +			for (cl = addr; cl < addr + size;
> +			     cl += cpuinfo->dcache.block_size)
> +				mtspr(SPR_DCBFR, cl);
> +		}

Also here,I think it would be better to move this to cacheflush.h as a function like
flush_dcache_range().

Or, local_dcache_range_flush(), which seems to be the convention we use in
cacheflush.h/cache.c.


>  		break;
>  	case DMA_FROM_DEVICE:
>  		/* Invalidate the dcache for the requested range */
> -		for (cl = addr; cl < addr + size;
> -		     cl += cpuinfo->dcache_block_size)
> -			mtspr(SPR_DCBIR, cl);
> +		if (cpuinfo->dcache.ways) {
> +			for (cl = addr; cl < addr + size;
> +			     cl += cpuinfo->dcache.block_size)
> +				mtspr(SPR_DCBIR, cl);
> +		}

This one could be invalidate_dcache_range().   Note, this will also be useful
for the kexec patches that I am working on.

Or, local_dcache_range_inv(), which seems to be the convention we use in
cacheflush.h/cache.c.

>  		break;
>  	default:
>  		/*
> diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
> index be56eaafc8b9..38172c0989cf 100644
> --- a/arch/openrisc/kernel/setup.c
> +++ b/arch/openrisc/kernel/setup.c
> @@ -107,27 +107,6 @@ static void print_cpuinfo(void)
>  	printk(KERN_INFO "CPU: OpenRISC-%x (revision %d) @%d MHz\n",
>  	       version, revision, cpuinfo->clock_frequency / 1000000);
>  
> -	if (!(upr & SPR_UPR_UP)) {
> -		printk(KERN_INFO
> -		       "-- no UPR register... unable to detect configuration\n");
> -		return;
> -	}
> -
> -	if (upr & SPR_UPR_DCP)
> -		printk(KERN_INFO
> -		       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
> -		       cpuinfo->dcache_size, cpuinfo->dcache_block_size,
> -		       cpuinfo->dcache_ways);
> -	else
> -		printk(KERN_INFO "-- dcache disabled\n");
> -	if (upr & SPR_UPR_ICP)
> -		printk(KERN_INFO
> -		       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
> -		       cpuinfo->icache_size, cpuinfo->icache_block_size,
> -		       cpuinfo->icache_ways);
> -	else
> -		printk(KERN_INFO "-- icache disabled\n");
> -
>  	if (upr & SPR_UPR_DMP)
>  		printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n",
>  		       1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
> @@ -155,8 +134,6 @@ static void print_cpuinfo(void)
>  void __init setup_cpuinfo(void)
>  {
>  	struct device_node *cpu;
> -	unsigned long iccfgr, dccfgr;
> -	unsigned long cache_set_size;
>  	int cpu_id = smp_processor_id();
>  	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id];
>  
> @@ -164,20 +141,6 @@ void __init setup_cpuinfo(void)
>  	if (!cpu)
>  		panic("Couldn't find CPU%d in device tree...\n", cpu_id);
>  
> -	iccfgr = mfspr(SPR_ICCFGR);
> -	cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> -	cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> -	cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> -	cpuinfo->icache_size =
> -	    cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size;
> -
> -	dccfgr = mfspr(SPR_DCCFGR);
> -	cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> -	cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> -	cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> -	cpuinfo->dcache_size =
> -	    cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size;
> -
>  	if (of_property_read_u32(cpu, "clock-frequency",
>  				 &cpuinfo->clock_frequency)) {
>  		printk(KERN_WARNING
> @@ -320,14 +283,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
>  		seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV);
>  	}
>  	seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ);
> -	seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size);
> -	seq_printf(m, "dcache block size\t: %d bytes\n",
> -		   cpuinfo->dcache_block_size);
> -	seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways);
> -	seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size);
> -	seq_printf(m, "icache block size\t: %d bytes\n",
> -		   cpuinfo->icache_block_size);
> -	seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways);
>  	seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n",
>  		   1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
>  		   1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW));
> diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c
> index eb43b73f3855..acdf2dc256bf 100644
> --- a/arch/openrisc/mm/cache.c
> +++ b/arch/openrisc/mm/cache.c
> @@ -16,10 +16,15 @@
>  #include <asm/cacheflush.h>
>  #include <asm/tlbflush.h>
>  
> -static __always_inline void cache_loop(struct page *page, const unsigned int reg)
> +static __always_inline void cache_loop(struct page *page, const unsigned int reg,
> +				       const unsigned int cache_type)
>  {
>  	unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT;
>  	unsigned long line = paddr & ~(L1_CACHE_BYTES - 1);
> +	unsigned long upr = mfspr(SPR_UPR);
> +
> +	if (!(upr & SPR_UPR_UP & cache_type))
> +		return;
>  
>  	while (line < paddr + PAGE_SIZE) {
>  		mtspr(reg, line);
> @@ -29,13 +34,13 @@ static __always_inline void cache_loop(struct page *page, const unsigned int reg
>  
>  void local_dcache_page_flush(struct page *page)
>  {
> -	cache_loop(page, SPR_DCBFR);
> +	cache_loop(page, SPR_DCBFR, SPR_UPR_DCP);
>  }
>  EXPORT_SYMBOL(local_dcache_page_flush);
>  
>  void local_icache_page_inv(struct page *page)
>  {
> -	cache_loop(page, SPR_ICBIR);
> +	cache_loop(page, SPR_ICBIR, SPR_UPR_ICP);
>  }
>  EXPORT_SYMBOL(local_icache_page_inv);

OK, if we move the range flush and invalidate here we will need to add to this
cache_loop a bit more.

> diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
> index d0cb1a0126f9..bbe16546c5b9 100644
> --- a/arch/openrisc/mm/init.c
> +++ b/arch/openrisc/mm/init.c
> @@ -124,6 +124,7 @@ static void __init map_ram(void)
>  void __init paging_init(void)
>  {
>  	int i;
> +	unsigned long upr;
>  
>  	printk(KERN_INFO "Setting up paging and PTEs.\n");
>  
> @@ -176,8 +177,11 @@ void __init paging_init(void)
>  	barrier();
>  
>  	/* Invalidate instruction caches after code modification */
> -	mtspr(SPR_ICBIR, 0x900);
> -	mtspr(SPR_ICBIR, 0xa00);
> +	upr = mfspr(SPR_UPR);
> +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
> +		mtspr(SPR_ICBIR, 0x900);
> +		mtspr(SPR_ICBIR, 0xa00);
> +	}

Here we could use new utilities such as local_icache_range_inv(0x900,
L1_CACHE_BYTES);

Or something like local_icache_block_inv(0x900).  This only needs to flush a
single block as the code it is invalidating is just 2 instructions 8 bytes:

    .org 0x900
	l.j     boot_dtlb_miss_handler
	 l.nop

    .org 0xa00
	l.j     boot_itlb_miss_handler
	 l.nop
>
>  
>  	/* New TLB miss handlers and kernel page tables are in now place.
>  	 * Make sure that page flags get updated for all pages in TLB by
> -- 
> 2.48.1

-Stafford