linux-kernel - Re: [PATCH v6 2/5] riscv: add support for SBI Supervisor Software Events extension

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c6488202-7721-4f08-8b27-bddbda70296a@ghiti.fr>
Date: Wed, 13 Aug 2025 12:09:49 +0200
From: Alexandre Ghiti <alex@...ti.fr>
To: Clément Léger <cleger@...osinc.com>,
 Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt
 <palmer@...belt.com>, linux-riscv@...ts.infradead.org,
 linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org
Cc: Himanshu Chauhan <hchauhan@...tanamicro.com>,
 Anup Patel <apatel@...tanamicro.com>, Xu Lu <luxu.kernel@...edance.com>,
 Atish Patra <atishp@...shpatra.org>, Björn Töpel
 <bjorn@...osinc.com>, Yunhui Cui <cuiyunhui@...edance.com>
Subject: Re: [PATCH v6 2/5] riscv: add support for SBI Supervisor Software
 Events extension

On 8/8/25 17:38, Clément Léger wrote:
> The SBI SSE extension allows the supervisor software to be notified by


I would change "notified" with "interrupted" no?


> the SBI of specific events that are not maskable. The context switch is


Nit: s/the SBI/the SBI implementation


> handled partially by the firmware which will save registers a6 and a7.
> When entering kernel we can rely on these 2 registers to setup the stack
> and save all the registers.
>
> Since SSE events can be delivered at any time to the kernel (including
> during exception handling, we need a way to locate the current_task for


s/handling/handling)


> context tracking. On RISC-V, it is sotred in scratch when in user space

s/sotred/stored

> or tp when in kernel space (in which case SSCRATCH is zero). But at a


Remove "at a"


> at the beginning of exception handling, SSCRATCH is used to swap tp and
> check the origin of the exception. If interrupted at that point, then,
> there is no way to reliably know were is located the current


s/were/where


> task_struct. Even checking the interruption location won't work as SSE
> event can be nested on top of each other so the original interruption
> site might be lost at some point. In order to retrieve it reliably,
> store the current task in an additional __sse_entry_task per_cpu array.
> This array is then used to retrieve the current task based on the
> hart ID that is passed to the SSE event handler in a6.
>
> That being said, the way the current task struct is stored should
> probably be reworked to find a better reliable alternative.
>
> Since each events (and each CPU for local events) have their own


"Since each event has its own"


> context and can preempt each other, allocate a stack (and a shadow stack
> if needed for each of them (and for each cpu for local events).


s/needed/needed)


>
> When completing the event, if we were coming from kernel with interrupts
> disabled, simply return there. If coming from userspace or kernel with
> interrupts enabled, simulate an interrupt exception by setting IE_SIE in
> CSR_IP to allow delivery of signals to user task. For instance this can
> happen, when a RAS event has been generated by a user application and a
> SIGBUS has been sent to a task.
>
> Signed-off-by: Clément Léger <cleger@...osinc.com>
> ---
>   arch/riscv/include/asm/asm.h         |  14 ++-
>   arch/riscv/include/asm/scs.h         |   7 ++
>   arch/riscv/include/asm/sse.h         |  47 +++++++
>   arch/riscv/include/asm/switch_to.h   |  14 +++
>   arch/riscv/include/asm/thread_info.h |   1 +
>   arch/riscv/kernel/Makefile           |   1 +
>   arch/riscv/kernel/asm-offsets.c      |  14 +++
>   arch/riscv/kernel/sse.c              | 154 +++++++++++++++++++++++
>   arch/riscv/kernel/sse_entry.S        | 180 +++++++++++++++++++++++++++
>   9 files changed, 429 insertions(+), 3 deletions(-)
>   create mode 100644 arch/riscv/include/asm/sse.h
>   create mode 100644 arch/riscv/kernel/sse.c
>   create mode 100644 arch/riscv/kernel/sse_entry.S
>
> diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
> index a8a2af6dfe9d..982c4be9a9c3 100644
> --- a/arch/riscv/include/asm/asm.h
> +++ b/arch/riscv/include/asm/asm.h
> @@ -90,16 +90,24 @@
>   #define PER_CPU_OFFSET_SHIFT 3
>   #endif
>   
> -.macro asm_per_cpu dst sym tmp
> -	REG_L \tmp, TASK_TI_CPU_NUM(tp)
> -	slli  \tmp, \tmp, PER_CPU_OFFSET_SHIFT
> +.macro asm_per_cpu_with_cpu dst sym tmp cpu
> +	slli  \tmp, \cpu, PER_CPU_OFFSET_SHIFT
>   	la    \dst, __per_cpu_offset
>   	add   \dst, \dst, \tmp
>   	REG_L \tmp, 0(\dst)
>   	la    \dst, \sym
>   	add   \dst, \dst, \tmp
>   .endm
> +
> +.macro asm_per_cpu dst sym tmp
> +	REG_L \tmp, TASK_TI_CPU_NUM(tp)


cpu is not xlen-wide, see 
https://lore.kernel.org/linux-riscv/20250725165410.2896641-3-rkrcmar@ventanamicro.com/. 
Can you rebase your next version on top my fixes branch which contains 
this fix? Here it is 
https://git.kernel.org/pub/scm/linux/kernel/git/alexghiti/linux.git/log/?h=alex-fixes


> +	asm_per_cpu_with_cpu \dst \sym \tmp \tmp
> +.endm
>   #else /* CONFIG_SMP */
> +.macro asm_per_cpu_with_cpu dst sym tmp cpu
> +	la    \dst, \sym
> +.endm
> +
>   .macro asm_per_cpu dst sym tmp
>   	la    \dst, \sym
>   .endm
> diff --git a/arch/riscv/include/asm/scs.h b/arch/riscv/include/asm/scs.h
> index 0e45db78b24b..62344daad73d 100644
> --- a/arch/riscv/include/asm/scs.h
> +++ b/arch/riscv/include/asm/scs.h
> @@ -18,6 +18,11 @@
>   	load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp
>   .endm
>   
> +/* Load the per-CPU IRQ shadow call stack to gp. */
> +.macro scs_load_sse_stack reg_evt
> +	REG_L gp, SSE_REG_EVT_SHADOW_STACK(\reg_evt)
> +.endm
> +
>   /* Load task_scs_sp(current) to gp. */
>   .macro scs_load_current
>   	REG_L	gp, TASK_TI_SCS_SP(tp)
> @@ -41,6 +46,8 @@
>   .endm
>   .macro scs_load_irq_stack tmp
>   .endm
> +.macro scs_load_sse_stack reg_evt
> +.endm
>   .macro scs_load_current
>   .endm
>   .macro scs_load_current_if_task_changed prev
> diff --git a/arch/riscv/include/asm/sse.h b/arch/riscv/include/asm/sse.h
> new file mode 100644
> index 000000000000..8929a268462c
> --- /dev/null
> +++ b/arch/riscv/include/asm/sse.h
> @@ -0,0 +1,47 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2024 Rivos Inc.
> + */
> +#ifndef __ASM_SSE_H
> +#define __ASM_SSE_H
> +
> +#include <asm/sbi.h>
> +
> +#ifdef CONFIG_RISCV_SSE
> +
> +struct sse_event_interrupted_state {
> +	unsigned long a6;
> +	unsigned long a7;
> +};
> +
> +struct sse_event_arch_data {
> +	void *stack;
> +	void *shadow_stack;
> +	unsigned long tmp;
> +	struct sse_event_interrupted_state interrupted;
> +	unsigned long interrupted_phys;
> +	u32 evt_id;
> +	unsigned int hart_id;
> +	unsigned int cpu_id;
> +};
> +
> +static inline bool sse_event_is_global(u32 evt)
> +{
> +	return !!(evt & SBI_SSE_EVENT_GLOBAL);
> +}
> +
> +void arch_sse_event_update_cpu(struct sse_event_arch_data *arch_evt, int cpu);
> +int arch_sse_init_event(struct sse_event_arch_data *arch_evt, u32 evt_id,
> +			int cpu);
> +void arch_sse_free_event(struct sse_event_arch_data *arch_evt);
> +int arch_sse_register_event(struct sse_event_arch_data *arch_evt);
> +
> +void sse_handle_event(struct sse_event_arch_data *arch_evt,
> +		      struct pt_regs *regs);
> +asmlinkage void handle_sse(void);
> +asmlinkage void do_sse(struct sse_event_arch_data *arch_evt,
> +		       struct pt_regs *reg);
> +
> +#endif
> +
> +#endif
> diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
> index 0e71eb82f920..cd1cead0c682 100644
> --- a/arch/riscv/include/asm/switch_to.h
> +++ b/arch/riscv/include/asm/switch_to.h
> @@ -88,6 +88,19 @@ static inline void __switch_to_envcfg(struct task_struct *next)
>   			:: "r" (next->thread.envcfg) : "memory");
>   }
>   
> +#ifdef CONFIG_RISCV_SSE
> +DECLARE_PER_CPU(struct task_struct *, __sse_entry_task);
> +
> +static inline void __switch_sse_entry_task(struct task_struct *next)
> +{
> +	__this_cpu_write(__sse_entry_task, next);
> +}
> +#else
> +static inline void __switch_sse_entry_task(struct task_struct *next)
> +{
> +}
> +#endif
> +
>   extern struct task_struct *__switch_to(struct task_struct *,
>   				       struct task_struct *);
>   
> @@ -122,6 +135,7 @@ do {							\
>   	if (switch_to_should_flush_icache(__next))	\
>   		local_flush_icache_all();		\
>   	__switch_to_envcfg(__next);			\
> +	__switch_sse_entry_task(__next);			\
>   	((last) = __switch_to(__prev, __next));		\
>   } while (0)
>   
> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> index f5916a70879a..28e9805e61fc 100644
> --- a/arch/riscv/include/asm/thread_info.h
> +++ b/arch/riscv/include/asm/thread_info.h
> @@ -36,6 +36,7 @@
>   #define OVERFLOW_STACK_SIZE     SZ_4K
>   
>   #define IRQ_STACK_SIZE		THREAD_SIZE
> +#define SSE_STACK_SIZE		THREAD_SIZE
>   
>   #ifndef __ASSEMBLY__
>   
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index c7b542573407..62e4490b34ee 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -99,6 +99,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
>   obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
>   obj-$(CONFIG_HAVE_PERF_REGS)	+= perf_regs.o
>   obj-$(CONFIG_RISCV_SBI)		+= sbi.o sbi_ecall.o
> +obj-$(CONFIG_RISCV_SSE)		+= sse.o sse_entry.o
>   ifeq ($(CONFIG_RISCV_SBI), y)
>   obj-$(CONFIG_SMP)		+= sbi-ipi.o
>   obj-$(CONFIG_SMP) += cpu_ops_sbi.o
> diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
> index 6e8c0d6feae9..315547c3a2ef 100644
> --- a/arch/riscv/kernel/asm-offsets.c
> +++ b/arch/riscv/kernel/asm-offsets.c
> @@ -14,6 +14,8 @@
>   #include <asm/ptrace.h>
>   #include <asm/cpu_ops_sbi.h>
>   #include <asm/stacktrace.h>
> +#include <asm/sbi.h>
> +#include <asm/sse.h>
>   #include <asm/suspend.h>
>   
>   void asm_offsets(void);
> @@ -528,4 +530,16 @@ void asm_offsets(void)
>   	DEFINE(FREGS_A6,	    offsetof(struct __arch_ftrace_regs, a6));
>   	DEFINE(FREGS_A7,	    offsetof(struct __arch_ftrace_regs, a7));
>   #endif
> +
> +#ifdef CONFIG_RISCV_SSE
> +	OFFSET(SSE_REG_EVT_STACK, sse_event_arch_data, stack);
> +	OFFSET(SSE_REG_EVT_SHADOW_STACK, sse_event_arch_data, shadow_stack);
> +	OFFSET(SSE_REG_EVT_TMP, sse_event_arch_data, tmp);
> +	OFFSET(SSE_REG_HART_ID, sse_event_arch_data, hart_id);
> +	OFFSET(SSE_REG_CPU_ID, sse_event_arch_data, cpu_id);
> +
> +	DEFINE(SBI_EXT_SSE, SBI_EXT_SSE);
> +	DEFINE(SBI_SSE_EVENT_COMPLETE, SBI_SSE_EVENT_COMPLETE);
> +	DEFINE(ASM_NR_CPUS, NR_CPUS);
> +#endif
>   }
> diff --git a/arch/riscv/kernel/sse.c b/arch/riscv/kernel/sse.c
> new file mode 100644
> index 000000000000..d2da7e23a74a
> --- /dev/null
> +++ b/arch/riscv/kernel/sse.c
> @@ -0,0 +1,154 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Rivos Inc.
> + */
> +#include <linux/nmi.h>
> +#include <linux/scs.h>
> +#include <linux/bitfield.h>
> +#include <linux/riscv_sse.h>
> +#include <linux/percpu-defs.h>
> +
> +#include <asm/asm-prototypes.h>
> +#include <asm/switch_to.h>
> +#include <asm/irq_stack.h>
> +#include <asm/sbi.h>
> +#include <asm/sse.h>
> +
> +DEFINE_PER_CPU(struct task_struct *, __sse_entry_task);
> +
> +void __weak sse_handle_event(struct sse_event_arch_data *arch_evt, struct pt_regs *regs)
> +{
> +}
> +
> +void do_sse(struct sse_event_arch_data *arch_evt, struct pt_regs *regs)
> +{
> +	nmi_enter();
> +
> +	/* Retrieve missing GPRs from SBI */
> +	sbi_ecall(SBI_EXT_SSE, SBI_SSE_EVENT_ATTR_READ, arch_evt->evt_id,
> +		  SBI_SSE_ATTR_INTERRUPTED_A6,
> +		  (SBI_SSE_ATTR_INTERRUPTED_A7 - SBI_SSE_ATTR_INTERRUPTED_A6) + 1,
> +		  arch_evt->interrupted_phys, 0, 0);
> +
> +	memcpy(&regs->a6, &arch_evt->interrupted, sizeof(arch_evt->interrupted));
> +
> +	sse_handle_event(arch_evt, regs);
> +
> +	/*
> +	 * The SSE delivery path does not uses the "standard" exception path
> +	 * (see sse_entry.S) and does not process any pending signal/softirqs
> +	 * due to being similar to a NMI.
> +	 * Some drivers (PMU, RAS) enqueue pending work that needs to be handled
> +	 * as soon as possible by bottom halves. For that purpose, set the SIP
> +	 * software interrupt pending bit which will force a software interrupt
> +	 * to be serviced once interrupts are reenabled in the interrupted
> +	 * context if they were masked or directly if unmasked.
> +	 */
> +	csr_set(CSR_IP, IE_SIE);
> +
> +	nmi_exit();
> +}
> +
> +static void *alloc_to_stack_pointer(void *alloc)
> +{
> +	return alloc ? alloc + SSE_STACK_SIZE : NULL;
> +}
> +
> +static void *stack_pointer_to_alloc(void *stack)
> +{
> +	return stack - SSE_STACK_SIZE;
> +}
> +
> +#ifdef CONFIG_VMAP_STACK
> +static void *sse_stack_alloc(unsigned int cpu)
> +{
> +	void *stack = arch_alloc_vmap_stack(SSE_STACK_SIZE, cpu_to_node(cpu));


I'm wondering what could happen if, in case of Svvptc, we take a trap in 
the sse path because the vmalloced stack page table entry is not yet 
seen by the page table walker, would that be an issue?


> +
> +	return alloc_to_stack_pointer(stack);
> +}
> +
> +static void sse_stack_free(void *stack)
> +{
> +	vfree(stack_pointer_to_alloc(stack));
> +}
> +#else /* CONFIG_VMAP_STACK */
> +static void *sse_stack_alloc(unsigned int cpu)
> +{
> +	void *stack = kmalloc(SSE_STACK_SIZE, GFP_KERNEL);
> +
> +	return alloc_to_stack_pointer(stack);
> +}
> +
> +static void sse_stack_free(void *stack)
> +{
> +	kfree(stack_pointer_to_alloc(stack));
> +}
> +#endif /* CONFIG_VMAP_STACK */
> +
> +static int sse_init_scs(int cpu, struct sse_event_arch_data *arch_evt)
> +{
> +	void *stack;
> +
> +	if (!scs_is_enabled())
> +		return 0;
> +
> +	stack = scs_alloc(cpu_to_node(cpu));
> +	if (!stack)
> +		return -ENOMEM;
> +
> +	arch_evt->shadow_stack = stack;
> +
> +	return 0;
> +}
> +
> +void arch_sse_event_update_cpu(struct sse_event_arch_data *arch_evt, int cpu)
> +{
> +	arch_evt->cpu_id = cpu;
> +	arch_evt->hart_id = cpuid_to_hartid_map(cpu);
> +}
> +
> +int arch_sse_init_event(struct sse_event_arch_data *arch_evt, u32 evt_id, int cpu)
> +{
> +	void *stack;
> +
> +	arch_evt->evt_id = evt_id;
> +	stack = sse_stack_alloc(cpu);
> +	if (!stack)
> +		return -ENOMEM;
> +
> +	arch_evt->stack = stack;
> +
> +	if (sse_init_scs(cpu, arch_evt)) {
> +		sse_stack_free(arch_evt->stack);
> +		return -ENOMEM;
> +	}
> +
> +	if (sse_event_is_global(evt_id)) {
> +		arch_evt->interrupted_phys =
> +					virt_to_phys(&arch_evt->interrupted);


I may be missing something but I don't see why interrupted could not be 
a userspace address?


> +	} else {
> +		arch_evt->interrupted_phys =
> +				per_cpu_ptr_to_phys(&arch_evt->interrupted);
> +	}
> +
> +	arch_sse_event_update_cpu(arch_evt, cpu);
> +
> +	return 0;
> +}
> +
> +void arch_sse_free_event(struct sse_event_arch_data *arch_evt)
> +{
> +	scs_free(arch_evt->shadow_stack);
> +	sse_stack_free(arch_evt->stack);
> +}
> +
> +int arch_sse_register_event(struct sse_event_arch_data *arch_evt)
> +{
> +	struct sbiret sret;
> +
> +	sret = sbi_ecall(SBI_EXT_SSE, SBI_SSE_EVENT_REGISTER, arch_evt->evt_id,
> +			 (unsigned long)handle_sse, (unsigned long)arch_evt, 0,
> +			 0, 0);
> +
> +	return sbi_err_map_linux_errno(sret.error);
> +}
> diff --git a/arch/riscv/kernel/sse_entry.S b/arch/riscv/kernel/sse_entry.S
> new file mode 100644
> index 000000000000..112bdb7d4369
> --- /dev/null
> +++ b/arch/riscv/kernel/sse_entry.S
> @@ -0,0 +1,180 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2024 Rivos Inc.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/linkage.h>
> +
> +#include <asm/asm.h>
> +#include <asm/csr.h>
> +#include <asm/scs.h>
> +
> +/* When entering handle_sse, the following registers are set:
> + * a6: contains the hartid
> + * a7: contains a sse_event_arch_data struct pointer
> + */
> +SYM_CODE_START(handle_sse)
> +	/* Save stack temporarily */
> +	REG_S sp, SSE_REG_EVT_TMP(a7)
> +	/* Set entry stack */
> +	REG_L sp, SSE_REG_EVT_STACK(a7)
> +
> +	addi sp, sp, -(PT_SIZE_ON_STACK)
> +	REG_S ra, PT_RA(sp)
> +	REG_S s0, PT_S0(sp)
> +	REG_S s1, PT_S1(sp)
> +	REG_S s2, PT_S2(sp)
> +	REG_S s3, PT_S3(sp)
> +	REG_S s4, PT_S4(sp)
> +	REG_S s5, PT_S5(sp)
> +	REG_S s6, PT_S6(sp)
> +	REG_S s7, PT_S7(sp)
> +	REG_S s8, PT_S8(sp)
> +	REG_S s9, PT_S9(sp)
> +	REG_S s10, PT_S10(sp)
> +	REG_S s11, PT_S11(sp)
> +	REG_S tp, PT_TP(sp)
> +	REG_S t0, PT_T0(sp)
> +	REG_S t1, PT_T1(sp)
> +	REG_S t2, PT_T2(sp)
> +	REG_S t3, PT_T3(sp)
> +	REG_S t4, PT_T4(sp)
> +	REG_S t5, PT_T5(sp)
> +	REG_S t6, PT_T6(sp)
> +	REG_S gp, PT_GP(sp)
> +	REG_S a0, PT_A0(sp)
> +	REG_S a1, PT_A1(sp)
> +	REG_S a2, PT_A2(sp)
> +	REG_S a3, PT_A3(sp)
> +	REG_S a4, PT_A4(sp)
> +	REG_S a5, PT_A5(sp)
> +
> +	/* Retrieve entry sp */
> +	REG_L a4, SSE_REG_EVT_TMP(a7)
> +	/* Save CSRs */
> +	csrr a0, CSR_EPC
> +	csrr a1, CSR_SSTATUS
> +	csrr a2, CSR_STVAL
> +	csrr a3, CSR_SCAUSE
> +
> +	REG_S a0, PT_EPC(sp)
> +	REG_S a1, PT_STATUS(sp)
> +	REG_S a2, PT_BADADDR(sp)
> +	REG_S a3, PT_CAUSE(sp)
> +	REG_S a4, PT_SP(sp)
> +
> +	/* Disable user memory access and floating/vector computing */
> +	li t0, SR_SUM | SR_FS_VS
> +	csrc CSR_STATUS, t0
> +
> +	load_global_pointer
> +	scs_load_sse_stack a7
> +
> +	/* Restore current task struct from __sse_entry_task */
> +	li t1, ASM_NR_CPUS
> +	mv t3, zero


t1 is only used in find_hart_id_slowpath and shouldn't we use the real 
number of present cpus instead of NR_CPUS?

t3 is clobbered by cpu_id below, is that correct?


> +
> +#ifdef CONFIG_SMP
> +	REG_L t4, SSE_REG_HART_ID(a7)
> +	REG_L t3, SSE_REG_CPU_ID(a7)


hart_id and cpu_id are "unsigned int", not xlen-wide fields so you must 
use lw here, not REG_L (a lot of bugs lately on the usage of this macro).


> +
> +	bne t4, a6, .Lfind_hart_id_slowpath
> +
> +.Lcpu_id_found:
> +#endif
> +	asm_per_cpu_with_cpu t2 __sse_entry_task t1 t3
> +	REG_L tp, 0(t2)
> +
> +	mv a1, sp /* pt_regs on stack */
> +
> +	/*
> +	 * Save sscratch for restoration since we might have interrupted the
> +	 * kernel in early exception path and thus, we don't know the content of
> +	 * sscratch.
> +	 */
> +	csrr s4, CSR_SSCRATCH
> +	/* In-kernel scratch is 0 */
> +	csrw CSR_SCRATCH, x0


csrrw s4, CSR_SSCRATCH, x0 instead?


> +
> +	mv a0, a7
> +
> +	call do_sse
> +
> +	csrw CSR_SSCRATCH, s4
> +
> +	REG_L a0, PT_STATUS(sp)
> +	REG_L a1, PT_EPC(sp)
> +	REG_L a2, PT_BADADDR(sp)
> +	REG_L a3, PT_CAUSE(sp)
> +	csrw CSR_SSTATUS, a0
> +	csrw CSR_EPC, a1
> +	csrw CSR_STVAL, a2
> +	csrw CSR_SCAUSE, a3
> +
> +	REG_L ra, PT_RA(sp)
> +	REG_L s0, PT_S0(sp)
> +	REG_L s1, PT_S1(sp)
> +	REG_L s2, PT_S2(sp)
> +	REG_L s3, PT_S3(sp)
> +	REG_L s4, PT_S4(sp)
> +	REG_L s5, PT_S5(sp)
> +	REG_L s6, PT_S6(sp)
> +	REG_L s7, PT_S7(sp)
> +	REG_L s8, PT_S8(sp)
> +	REG_L s9, PT_S9(sp)
> +	REG_L s10, PT_S10(sp)
> +	REG_L s11, PT_S11(sp)
> +	REG_L tp, PT_TP(sp)
> +	REG_L t0, PT_T0(sp)
> +	REG_L t1, PT_T1(sp)
> +	REG_L t2, PT_T2(sp)
> +	REG_L t3, PT_T3(sp)
> +	REG_L t4, PT_T4(sp)
> +	REG_L t5, PT_T5(sp)
> +	REG_L t6, PT_T6(sp)
> +	REG_L gp, PT_GP(sp)
> +	REG_L a0, PT_A0(sp)
> +	REG_L a1, PT_A1(sp)
> +	REG_L a2, PT_A2(sp)
> +	REG_L a3, PT_A3(sp)
> +	REG_L a4, PT_A4(sp)
> +	REG_L a5, PT_A5(sp)
> +
> +	REG_L sp, PT_SP(sp)
> +
> +	li a7, SBI_EXT_SSE
> +	li a6, SBI_SSE_EVENT_COMPLETE
> +	ecall
> +
> +#ifdef CONFIG_SMP
> +.Lfind_hart_id_slowpath:
> +
> +/* Slowpath to find the CPU id associated to the hart id */
> +la t0, __cpuid_to_hartid_map


^ Missing tab


> +
> +.Lhart_id_loop:
> +	REG_L t2, 0(t0)
> +	beq t2, a6, .Lcpu_id_found
> +
> +	/* Increment pointer and CPU number */
> +	addi t3, t3, 1
> +	addi t0, t0, RISCV_SZPTR
> +	bltu t3, t1, .Lhart_id_loop
> +
> +	/*
> +	 * This should never happen since we expect the hart_id to match one
> +	 * of our CPU, but better be safe than sorry
> +	 */
> +	la tp, init_task
> +	la a0, sse_hart_id_panic_string
> +	la t0, panic
> +	jalr t0
> +

This newline is not needed ^

> +#endif
> +
> +SYM_CODE_END(handle_sse)
> +
> +SYM_DATA_START_LOCAL(sse_hart_id_panic_string)
> +    .ascii "Unable to match hart_id with cpu\0"
> +SYM_DATA_END(sse_hart_id_panic_string)


Thanks,

Alex