lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <09d72f4e-170c-4f62-83ff-3053804038a7@rivosinc.com>
Date: Fri, 7 Jun 2024 16:11:04 -0400
From: Jesse Taube <jesse@...osinc.com>
To: Charlie Jenkins <charlie@...osinc.com>
Cc: linux-riscv@...ts.infradead.org, Paul Walmsley
 <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>,
 Albert Ou <aou@...s.berkeley.edu>, Conor Dooley
 <conor.dooley@...rochip.com>, Evan Green <evan@...osinc.com>,
 Andrew Jones <ajones@...tanamicro.com>, Xiao Wang <xiao.w.wang@...el.com>,
 Clément Léger <cleger@...osinc.com>,
 Andy Chiu <andy.chiu@...ive.com>, Greentime Hu <greentime.hu@...ive.com>,
 Heiko Stuebner <heiko@...ech.de>, Guo Ren <guoren@...nel.org>,
 Björn Töpel <bjorn@...osinc.com>,
 Costa Shulyupin <costa.shul@...hat.com>,
 Andrew Morton <akpm@...ux-foundation.org>, Baoquan He <bhe@...hat.com>,
 Sami Tolvanen <samitolvanen@...gle.com>, Zong Li <zong.li@...ive.com>,
 Ben Dooks <ben.dooks@...ethink.co.uk>, Erick Archer <erick.archer@....com>,
 Vincent Chen <vincent.chen@...ive.com>,
 Joel Granados <j.granados@...sung.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/3] RISC-V: Detect unaligned vector accesses supported.



On 6/7/24 15:53, Jesse Taube wrote:
> 
> 
> On 6/6/24 19:13, Charlie Jenkins wrote:
>> On Thu, Jun 06, 2024 at 02:29:23PM -0700, Charlie Jenkins wrote:
>>> On Thu, Jun 06, 2024 at 02:32:14PM -0400, Jesse Taube wrote:
>>>> Run a unaligned vector access to test if the system supports
>>>> vector unaligned access. Add the result to a new key in hwprobe.
>>>> This is useful for usermode to know if vector misaligned accesses are
>>>> supported and if they are faster or slower than equivalent byte accesses.
>>>>
>>>> Signed-off-by: Jesse Taube <jesse@...osinc.com>
>>>> ---
>>>>    arch/riscv/include/asm/cpufeature.h        |  2 +
>>>>    arch/riscv/include/asm/hwprobe.h           |  2 +-
>>>>    arch/riscv/include/asm/vector.h            |  1 +
>>>>    arch/riscv/include/uapi/asm/hwprobe.h      |  6 ++
>>>>    arch/riscv/kernel/sys_hwprobe.c            | 34 +++++++++
>>>>    arch/riscv/kernel/traps_misaligned.c       | 84 ++++++++++++++++++++--
>>>>    arch/riscv/kernel/unaligned_access_speed.c |  4 ++
>>>>    arch/riscv/kernel/vector.c                 |  2 +-
>>>>    8 files changed, 129 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>>>> index 347805446151..a012c8490a27 100644
>>>> --- a/arch/riscv/include/asm/cpufeature.h
>>>> +++ b/arch/riscv/include/asm/cpufeature.h
>>>> @@ -35,9 +35,11 @@ void riscv_user_isa_enable(void);
>>>>    
>>>>    #if defined(CONFIG_RISCV_MISALIGNED)
>>>>    bool check_unaligned_access_emulated_all_cpus(void);
>>>> +bool check_vector_unaligned_access_all_cpus(void);
>>>>    void unaligned_emulation_finish(void);
>>>>    bool unaligned_ctl_available(void);
>>>>    DECLARE_PER_CPU(long, misaligned_access_speed);
>>>> +DECLARE_PER_CPU(long, vector_misaligned_access);
>>>>    #else
>>>>    static inline bool unaligned_ctl_available(void)
>>>>    {
>>>> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
>>>> index 630507dff5ea..150a9877b0af 100644
>>>> --- a/arch/riscv/include/asm/hwprobe.h
>>>> +++ b/arch/riscv/include/asm/hwprobe.h
>>>> @@ -8,7 +8,7 @@
>>>>    
>>>>    #include <uapi/asm/hwprobe.h>
>>>>    
>>>> -#define RISCV_HWPROBE_MAX_KEY 6
>>>> +#define RISCV_HWPROBE_MAX_KEY 7
>>>>    
>>>>    static inline bool riscv_hwprobe_key_is_valid(__s64 key)
>>>>    {
>>>> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
>>>> index 731dcd0ed4de..776af9b37e23 100644
>>>> --- a/arch/riscv/include/asm/vector.h
>>>> +++ b/arch/riscv/include/asm/vector.h
>>>> @@ -21,6 +21,7 @@
>>>>    
>>>>    extern unsigned long riscv_v_vsize;
>>>>    int riscv_v_setup_vsize(void);
>>>> +bool insn_is_vector(u32 insn_buf);
>>>>    bool riscv_v_first_use_handler(struct pt_regs *regs);
>>>>    void kernel_vector_begin(void);
>>>>    void kernel_vector_end(void);
>>>> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
>>>> index 060212331a03..ebacff86f134 100644
>>>> --- a/arch/riscv/include/uapi/asm/hwprobe.h
>>>> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
>>>> @@ -68,6 +68,12 @@ struct riscv_hwprobe {
>>>>    #define		RISCV_HWPROBE_MISALIGNED_UNSUPPORTED	(4 << 0)
>>>>    #define		RISCV_HWPROBE_MISALIGNED_MASK		(7 << 0)
>>>>    #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE	6
>>>> +#define RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF	7
>>>> +#define		RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN		0
>>>> +#define		RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED		1
>>>> +#define		RISCV_HWPROBE_VEC_MISALIGNED_SLOW		2
>>>> +#define		RISCV_HWPROBE_VEC_MISALIGNED_FAST		3
>>>> +#define		RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED	4
>>>>    /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>>>>    
>>>>    /* Flags */
>>>> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
>>>> index b286b73e763e..ce641cc6e47a 100644
>>>> --- a/arch/riscv/kernel/sys_hwprobe.c
>>>> +++ b/arch/riscv/kernel/sys_hwprobe.c
>>>> @@ -184,6 +184,36 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
>>>>    }
>>>>    #endif
>>>>    
>>>> +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
>>>> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
>>>> +{
>>>> +	int cpu;
>>>> +	u64 perf = -1ULL;
>>>> +
>>>> +	for_each_cpu(cpu, cpus) {
>>>> +		int this_perf = per_cpu(vector_misaligned_access, cpu);
>>>> +
>>>> +		if (perf == -1ULL)
>>>> +			perf = this_perf;
>>>> +
>>>> +		if (perf != this_perf) {
>>>> +			perf = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	if (perf == -1ULL)
>>>> +		return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>>>> +
>>>> +	return perf;
>>>> +}
>>>> +#else
>>>> +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
>>>> +{
>>
>> I meant to mention this in my last message!
>>
>> The scalar version has cutouts for configs here like:
>>
>> 	if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS))
>> 		return RISCV_HWPROBE_MISALIGNED_FAST;
>>
> 

For both scalar and vector CONFIG_RISCV_PROBE_UNALIGNED_ACCESS probes 
the speed not support. So misaligned_access_speed will have a valid value.

```

	if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VEC_UNALIGNED_ACCESS))
		return RISCV_HWPROBE_MISALIGNED_FAST;

	if(IS_ENABLED(CONFIG_RISCV_SLOW_VEC_UNALIGNED_ACCESS))
		return RISCV_HWPROBE_VEC_MISALIGNED_SLOW;

	if(IS_ENABLED(CONFIG_RISCV_VEC_UNALIGNED_ACCESS))
		
/* Sence we cant gurentee that we do have UNALIGNED_ACCESS we can return 
the result from the test */

		return per_cpu(vector_misaligned_access, cpu);

	return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
```

> Will add
> 
>> Having this functionality on vector as well would be much appreciated.
>> I don't think it's valid to assume that vector and scalar have the same
>> speed, so this would require a vector version of the RISCV_MISALIGNED
>> tree in arch/riscv/Kconfig.
>>
>> - Charlie
>>
>>>> +	return RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>>>> +}
>>>> +#endif
>>>> +
>>>>    static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>>>>    			     const struct cpumask *cpus)
>>>>    {
>>>> @@ -211,6 +241,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>>>>    		pair->value = hwprobe_misaligned(cpus);
>>>>    		break;
>>>>    
>>>> +	case RISCV_HWPROBE_VEC_KEY_MISALIGNED_PERF:
>>>> +		pair->value = hwprobe_vec_misaligned(cpus);
>>>> +		break;
>>>> +
>>>>    	case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
>>>>    		pair->value = 0;
>>>>    		if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
>>>> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
>>>> index 2adb7c3e4dd5..8f26c3d92230 100644
>>>> --- a/arch/riscv/kernel/traps_misaligned.c
>>>> +++ b/arch/riscv/kernel/traps_misaligned.c
>>>> @@ -16,6 +16,7 @@
>>>>    #include <asm/entry-common.h>
>>>>    #include <asm/hwprobe.h>
>>>>    #include <asm/cpufeature.h>
>>>> +#include <asm/vector.h>
>>>>    
>>>>    #define INSN_MATCH_LB			0x3
>>>>    #define INSN_MASK_LB			0x707f
>>>> @@ -413,10 +414,6 @@ int handle_misaligned_load(struct pt_regs *regs)
>>>>    
>>>>    	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
>>>>    
>>>> -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>>>> -	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
>>>> -#endif
>>>> -
>>>>    	if (!unaligned_enabled)
>>>>    		return -1;
>>>>    
>>>> @@ -426,6 +423,17 @@ int handle_misaligned_load(struct pt_regs *regs)
>>>>    	if (get_insn(regs, epc, &insn))
>>>>    		return -1;
>>>>    
>>>> +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>>>> +	if (insn_is_vector(insn) &&
>>>> +	    *this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED) {
>>>> +		*this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
>>>> +		regs->epc = epc + INSN_LEN(insn);
>>>> +		return 0;
> 
> There is a return before scalar speed is set.
> 
>>>> +	}
>>>> +
>>>> +	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
>>>
>>> This unconditionally sets scalar unaligned accesses even if the
>>> unaligned access is caused by vector. Scalar unaligned accesses should
>>> only be set to emulated if this function is entered from a scalar
>>> unaligned load.
>>>
>>> The rest of this function handles how scalar unaligned accesses are
>>> emulated, and the equivalent needs to happen for vector. You need to add
>>> routines that manually load the data from the memory address into the
>>> vector register. When Clément did this for scalar, he provided a test
>>> case to help reviewers [1]. Please add onto these test cases or make
>>> your own for vector.
> 
> I wansnt planing on adding emulation in this patch. I can if needed.
> 
>>>
>>> Link: https://github.com/clementleger/unaligned_test [1]
>>>
>>>> +#endif
>>>> +
>>>>    	regs->epc = 0;
>>>>    
>>>>    	if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
>>>> @@ -625,6 +633,74 @@ static bool check_unaligned_access_emulated(int cpu)
>>>>    	return misaligned_emu_detected;
>>>>    }
>>>>    
>>>> +#ifdef CONFIG_RISCV_ISA_V
>>>> +static void check_vector_unaligned_access(struct work_struct *unused)
>>>
>>> Can you standardize this name with the scalar version by writing
>>> emulated in it?
> 
> We dont emulate it so that wouldn't make sence.
> 
>>>
>>> "check_vector_unaligned_access_emulated_all_cpus"
>>>
>>>> +{
>>>> +	int cpu = smp_processor_id();
>>>> +	long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
>>>> +	unsigned long tmp_var;
>>>> +
>>>> +	if (!riscv_isa_extension_available(hart_isa[cpu].isa, v))
>>>> +		return;
>>>> +
>>>> +	*mas_ptr = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>>>> +
>>>> +	local_irq_enable();
>>>> +	kernel_vector_begin();
>>>> +	__asm__ __volatile__ (
>>>> +		".balign 4\n\t"
>>>> +		".option push\n\t"
>>>> +		".option arch, +v\n\t"
>>>> +		"       vsetivli zero, 1, e16, m1, ta, ma\n\t"	// Vectors of 16b
>>>> +		"	vle16.v v0, (%[ptr])\n\t"		// Load bytes
>>>> +		".option pop\n\t"
>>>> +		: : [ptr] "r" ((u8 *)&tmp_var + 1) : "v0", "memory");
>>>
>>> memory is being read from, but not written to, so there is no need to
>>> have a memory clobber.
> 
> fixed.
> 
>>>
>>>> +	kernel_vector_end();
>>>> +
>>>> +	if (*mas_ptr == RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN)
>>>> +		*mas_ptr = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>>>> +}
>>>> +
>>>> +bool check_vector_unaligned_access_all_cpus(void)
>>>> +{
>>>> +	int cpu;
>>>> +	bool ret = true;
>>>> +
>>>> +	for_each_online_cpu(cpu)
>>>> +		if (riscv_isa_extension_available(hart_isa[cpu].isa, ZICCLSM))
>>>
>>> zicclsm is not specific to vector so it can be extracted out of this
>>> vector specific function. Assuming that hardware properly reports the
>>> extension, if zicclsm is present then it is known that both vector and
>>> scalar unaligned accesses are supported.
> 
> Added so we don't need to waste cycles testing support.
> 
>>>> +			per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>>>
>>> Please use the exising UNKNOWN terminology instead of renaming to
>>> SUPPORTED. Any option that is not UNSUPPORTED implies that unaligned
>>> accesses are supported.
> 
> Conor didnt like using UNKNOWN a proxy for "SUPPORTED"
> Having SUPPORTED is better then assuing the speed to be slow.
> 
>>>
>>>> +		else
>>>> +			ret = false;
>>>> +
>>>> +
>>>> +	if (ret)
>>>> +		return true;
>>>> +	ret = true;
>>>> +
>>>> +	schedule_on_each_cpu(check_vector_unaligned_access);
>>>> +
>>>> +	for_each_online_cpu(cpu)
>>>> +		if (per_cpu(vector_misaligned_access, cpu)
>>>> +		    != RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED)
>>>> +			return false;
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +#else
>>>
>>> If CONFIG_RISCV_ISA_V is not set, there is no value in checking if
>>> vector unaligned accesses are supported because userspace will not be
>>> allowed to use vector instructions anyway.
> 
> Oh I'm silly meant to be seting to all UNSUPPORTED.
> 
> 
> Thanks,
> Jesse Taube
> 
>>>
>>> - Charlie
>>>
>>>> +bool check_vector_unaligned_access_all_cpus(void)
>>>> +{
>>>> +	int cpu;
>>>> +
>>>> +	for_each_online_cpu(cpu)
>>>> +		if (riscv_isa_extension_available(hart_isa[cpu].isa, ZICCLSM))
>>>> +			per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_VEC_MISALIGNED_SUPPORTED;
>>>> +		else
>>>> +			per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_VEC_MISALIGNED_UNSUPPORTED;
>>>> +
>>>> +	return false;
>>>> +}
>>>> +#endif
>>>> +
>>>>    bool check_unaligned_access_emulated_all_cpus(void)
>>>>    {
>>>>    	int cpu;
>>>> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
>>>> index a9a6bcb02acf..92a84239beaa 100644
>>>> --- a/arch/riscv/kernel/unaligned_access_speed.c
>>>> +++ b/arch/riscv/kernel/unaligned_access_speed.c
>>>> @@ -20,6 +20,7 @@
>>>>    #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
>>>>    
>>>>    DEFINE_PER_CPU(long, misaligned_access_speed);
>>>> +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_VEC_MISALIGNED_UNKNOWN;
>>>>    
>>>>    #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
>>>>    static cpumask_t fast_misaligned_access;
>>>> @@ -264,6 +265,8 @@ static int check_unaligned_access_all_cpus(void)
>>>>    {
>>>>    	bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
>>>>    
>>>> +	check_vector_unaligned_access_all_cpus();
>>>> +
>>>>    	if (!all_cpus_emulated)
>>>>    		return check_unaligned_access_speed_all_cpus();
>>>>    
>>>> @@ -273,6 +276,7 @@ static int check_unaligned_access_all_cpus(void)
>>>>    static int check_unaligned_access_all_cpus(void)
>>>>    {
>>>>    	check_unaligned_access_emulated_all_cpus();
>>>> +	check_vector_unaligned_access_all_cpus();
>>>>    
>>>>    	return 0;
>>>>    }
>>>> diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
>>>> index 6727d1d3b8f2..2cceab739b2c 100644
>>>> --- a/arch/riscv/kernel/vector.c
>>>> +++ b/arch/riscv/kernel/vector.c
>>>> @@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void)
>>>>    #endif
>>>>    }
>>>>    
>>>> -static bool insn_is_vector(u32 insn_buf)
>>>> +bool insn_is_vector(u32 insn_buf)
>>>>    {
>>>>    	u32 opcode = insn_buf & __INSN_OPCODE_MASK;
>>>>    	u32 width, csr;
>>>> -- 
>>>> 2.43.0
>>>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ