[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8e2556d2-8021-4e4c-9380-7568ff74a84f@rivosinc.com>
Date: Tue, 4 Jun 2024 12:42:10 -0400
From: Jesse Taube <jesse@...osinc.com>
To: linux-riscv@...ts.infradead.org
Cc: Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>, Albert Ou <aou@...s.berkeley.edu>,
Conor Dooley <conor.dooley@...rochip.com>, Evan Green <evan@...osinc.com>,
Charlie Jenkins <charlie@...osinc.com>,
Andrew Jones <ajones@...tanamicro.com>, Clément Léger
<cleger@...osinc.com>, Xiao Wang <xiao.w.wang@...el.com>,
Andy Chiu <andy.chiu@...ive.com>, Costa Shulyupin <costa.shul@...hat.com>,
Björn Töpel <bjorn@...osinc.com>,
Ben Dooks <ben.dooks@...ethink.co.uk>,
"Gustavo A. R. Silva" <gustavoars@...nel.org>,
Alexandre Ghiti <alexghiti@...osinc.com>, Erick Archer
<erick.archer@....com>, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH v0] RISCV: Report vector unaligned accesses hwprobe
On 6/4/24 12:24, Jesse Taube wrote:
> Detected if a system traps into the kernel on an vector unaligned access.
> Add the result to a new key in hwprobe.
>
> Signed-off-by: Jesse Taube <jesse@...osinc.com>
> ---
> arch/riscv/include/asm/cpufeature.h | 3 ++
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/uapi/asm/hwprobe.h | 6 +++
> arch/riscv/kernel/sys_hwprobe.c | 34 ++++++++++++
> arch/riscv/kernel/traps_misaligned.c | 60 ++++++++++++++++++++++
> arch/riscv/kernel/unaligned_access_speed.c | 4 ++
> 6 files changed, 108 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index 347805446151..5ad69cf25b25 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -35,9 +35,12 @@ void riscv_user_isa_enable(void);
>
> #if defined(CONFIG_RISCV_MISALIGNED)
> bool check_unaligned_access_emulated_all_cpus(void);
> +bool check_vector_unaligned_access_all_cpus(void);
> +
> void unaligned_emulation_finish(void);
> bool unaligned_ctl_available(void);
> DECLARE_PER_CPU(long, misaligned_access_speed);
> +DECLARE_PER_CPU(long, vector_misaligned_access);
> #else
> static inline bool unaligned_ctl_available(void)
> {
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 630507dff5ea..150a9877b0af 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -8,7 +8,7 @@
>
> #include <uapi/asm/hwprobe.h>
>
> -#define RISCV_HWPROBE_MAX_KEY 6
> +#define RISCV_HWPROBE_MAX_KEY 7
>
> static inline bool riscv_hwprobe_key_is_valid(__s64 key)
> {
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index 060212331a03..4474e98d17bd 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -68,6 +68,12 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
> #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
> #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
> +#define RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF 7
There were talks about combining vecotor and scalar speed for the user
facing API into RISCV_HWPROBE_KEY_CPUPERF_0, but adding another key
seems easier.
> +#define RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN 0
> +#define RISCV_HWPROBE_VEC_MISALIGNED_EMULATED 1
> +#define RISCV_HWPROBE_VEC_MISALIGNED_SLOW 2
> +#define RISCV_HWPROBE_VEC_MISALIGNED_FAST 3
> +#define RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED 4
> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> /* Flags */
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index b286b73e763e..ce641cc6e47a 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -184,6 +184,36 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
> }
> #endif
>
> +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
> +{
> + int cpu;
> + u64 perf = -1ULL;
> +
> + for_each_cpu(cpu, cpus) {
> + int this_perf = per_cpu(vector_misaligned_access, cpu);
> +
> + if (perf == -1ULL)
> + perf = this_perf;
> +
> + if (perf != this_perf) {
> + perf = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
> + break;
> + }
> + }
> +
> + if (perf == -1ULL)
> + return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
> +
> + return perf;
> +}
> +#else
> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
> +{
> + return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
> +}
> +#endif
> +
> static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> const struct cpumask *cpus)
> {
> @@ -211,6 +241,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> pair->value = hwprobe_misaligned(cpus);
> break;
>
> + case RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF:
> + pair->value = hwprobe_vec_misaligned(cpus);
> + break;
> +
> case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
> pair->value = 0;
> if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> index 2adb7c3e4dd5..0c07e990e9c5 100644
> --- a/arch/riscv/kernel/traps_misaligned.c
> +++ b/arch/riscv/kernel/traps_misaligned.c
> @@ -16,6 +16,7 @@
> #include <asm/entry-common.h>
> #include <asm/hwprobe.h>
> #include <asm/cpufeature.h>
> +#include <asm/vector.h>
>
> #define INSN_MATCH_LB 0x3
> #define INSN_MASK_LB 0x707f
> @@ -426,6 +427,14 @@ int handle_misaligned_load(struct pt_regs *regs)
> if (get_insn(regs, epc, &insn))
> return -1;
>
Is this an appropriate way to check if there is vector missaligned
access? What if a unaligned vector load as called by the kernel before
this check?
> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
> + if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN) {
> + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
> + regs->epc = epc + INSN_LEN(insn);
> + return 0;
> + }
> +#endif
> +
> regs->epc = 0;
>
> if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
> @@ -625,6 +634,57 @@ static bool check_unaligned_access_emulated(int cpu)
> return misaligned_emu_detected;
> }
>
> +#ifdef CONFIG_RISCV_ISA_V
> +static bool check_vector_unaligned_access(int cpu)
> +{
> + long *mas_ptr = per_cpu_ptr(&vector_misaligned_access, cpu);
> + struct riscv_isainfo *isainfo = &hart_isa[cpu];
> + unsigned long tmp_var;
> + bool misaligned_vec_suported;
> +
> + if (!riscv_isa_extension_available(isainfo->isa, v))
> + return false;
> +
> + /* This case will only happen if a unaligned vector load
> + * was called by the kernel before this check
> + */
> + if (*mas_ptr != RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN)
> + return false;
> +
> + kernel_vector_begin();
> + __asm__ __volatile__ (
> + ".option push\n\t"
> + ".option arch, +v\n\t"
> + " li t1, 0x1\n" //size
> + " vsetvli t0, t1, e16, m2, ta, ma\n\t" // Vectors of 16b
> + " addi t0, %[ptr], 1\n\t" // Misalign address
> + " vle16.v v0, (t0)\n\t" // Load bytes
> + ".option pop\n\t"
> + : : [ptr] "r" (&tmp_var) : "v0", "t0", "t1", "memory");
> + kernel_vector_end();
> +
> + misaligned_vec_suported = (*mas_ptr == RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN);
> +
> + return misaligned_vec_suported;
> +}
> +#else
> +static bool check_vector_unaligned_access(int cpu)
> +{
> + return false;
> +}
> +#endif
> +
> +bool check_vector_unaligned_access_all_cpus(void)
> +{
> + int cpu;
> +
> + for_each_online_cpu(cpu)
> + if (!check_vector_unaligned_access(cpu))
> + return false;
> +
> + return true;
> +}
> +
> bool check_unaligned_access_emulated_all_cpus(void)
> {
> int cpu;
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index a9a6bcb02acf..92a84239beaa 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -20,6 +20,7 @@
> #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
>
> DEFINE_PER_CPU(long, misaligned_access_speed);
> +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>
> #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
> static cpumask_t fast_misaligned_access;
> @@ -264,6 +265,8 @@ static int check_unaligned_access_all_cpus(void)
> {
> bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
>
There was talks about Zicclsm, but spike doesnt have support for Zicclsm
afaik, but I was wondering if i should add Zicclsm to cpufeature and aswell.
If anyone wants to run this i tested with
spike --misaligned --isa=RV64IMAFDCV_zicntr_zihpm
--kernel=arch/riscv/boot/Image
opensbi/build/platform/generic/firmware/fw_jump.elf
Thanks,
Jesse Taube
> + check_vector_unaligned_access_all_cpus();
> +
> if (!all_cpus_emulated)
> return check_unaligned_access_speed_all_cpus();
>
> @@ -273,6 +276,7 @@ static int check_unaligned_access_all_cpus(void)
> static int check_unaligned_access_all_cpus(void)
> {
> check_unaligned_access_emulated_all_cpus();
> + check_vector_unaligned_access_all_cpus();
>
> return 0;
> }
Powered by blists - more mailing lists