[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z5Pakko46SmUsz9d@ghost>
Date: Fri, 24 Jan 2025 10:23:14 -0800
From: Charlie Jenkins <charlie@...osinc.com>
To: Brian Gerst <brgerst@...il.com>
Cc: Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>,
Huacai Chen <chenhuacai@...nel.org>,
WANG Xuerui <kernel@...0n.name>,
Thomas Gleixner <tglx@...utronix.de>,
Peter Zijlstra <peterz@...radead.org>,
Andy Lutomirski <luto@...nel.org>,
Alexandre Ghiti <alexghiti@...osinc.com>,
linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org,
loongarch@...ts.linux.dev
Subject: Re: [PATCH v2 1/4] riscv: entry: Convert ret_from_fork() to C
On Fri, Jan 24, 2025 at 08:14:08AM -0500, Brian Gerst wrote:
> On Thu, Jan 23, 2025 at 2:15 PM Charlie Jenkins <charlie@...osinc.com> wrote:
> >
> > Move the main section of ret_from_fork() to C to allow inlining of
> > syscall_exit_to_user_mode().
> >
> > Signed-off-by: Charlie Jenkins <charlie@...osinc.com>
> > ---
> > arch/riscv/include/asm/asm-prototypes.h | 1 +
> > arch/riscv/kernel/entry.S | 15 ++++++---------
> > arch/riscv/kernel/process.c | 14 ++++++++++++--
> > 3 files changed, 19 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
> > index cd627ec289f163a630b73dd03dd52a6b28692997..733ff609778797001006c33bba9e3cc5b1f15387 100644
> > --- a/arch/riscv/include/asm/asm-prototypes.h
> > +++ b/arch/riscv/include/asm/asm-prototypes.h
> > @@ -52,6 +52,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
> > DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
> > DECLARE_DO_ERROR_INFO(do_trap_break);
> >
> > +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs);
> > asmlinkage void handle_bad_stack(struct pt_regs *regs);
> > asmlinkage void do_page_fault(struct pt_regs *regs);
> > asmlinkage void do_irq(struct pt_regs *regs);
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow)
> > ASM_NOKPROBE(handle_kernel_stack_overflow)
> > #endif
> >
> > -SYM_CODE_START(ret_from_fork)
> > +SYM_CODE_START(ret_from_fork_asm)
> > call schedule_tail
> > - beqz s0, 1f /* not from kernel thread */
> > - /* Call fn(arg) */
> > - move a0, s1
> > - jalr s0
> > -1:
> > - move a0, sp /* pt_regs */
> > - call syscall_exit_to_user_mode
> > + move a0, s1 /* fn */
> > + move a1, s0 /* fn_arg */
> > + move a2, sp /* pt_regs */
> > + call ret_from_fork
> > j ret_from_exception
> > -SYM_CODE_END(ret_from_fork)
> > +SYM_CODE_END(ret_from_fork_asm)
> >
> > #ifdef CONFIG_IRQ_STACKS
> > /*
> > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> > index 58b6482c2bf662bf5224ca50c8e21a68760a6b41..0d07e6d8f6b57beba438dbba5e8c74a014582bee 100644
> > --- a/arch/riscv/kernel/process.c
> > +++ b/arch/riscv/kernel/process.c
> > @@ -17,7 +17,9 @@
> > #include <linux/ptrace.h>
> > #include <linux/uaccess.h>
> > #include <linux/personality.h>
> > +#include <linux/entry-common.h>
> >
> > +#include <asm/asm-prototypes.h>
> > #include <asm/unistd.h>
> > #include <asm/processor.h>
> > #include <asm/csr.h>
> > @@ -36,7 +38,7 @@ unsigned long __stack_chk_guard __read_mostly;
> > EXPORT_SYMBOL(__stack_chk_guard);
> > #endif
> >
> > -extern asmlinkage void ret_from_fork(void);
> > +extern asmlinkage void ret_from_fork_asm(void);
> >
> > void noinstr arch_cpu_idle(void)
> > {
> > @@ -206,6 +208,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> > return 0;
> > }
> >
> > +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
> > +{
> > + if (unlikely(fn))
> > + fn(fn_arg);
> > +
> > + syscall_exit_to_user_mode(regs);
> > +}
> > +
> > int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > {
> > unsigned long clone_flags = args->flags;
> > @@ -242,7 +252,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > p->thread.riscv_v_flags = 0;
> > if (has_vector())
> > riscv_v_thread_alloc(p);
> > - p->thread.ra = (unsigned long)ret_from_fork;
> > + p->thread.ra = (unsigned long)ret_from_fork_asm;
> > p->thread.sp = (unsigned long)childregs; /* kernel sp */
> > return 0;
> > }
> >
> > --
> > 2.43.0
> >
> >
>
> Is there a specific reason you didn't move the call to schedule_tail()
> to the C function, like on x86?
Yes, the generated code ends up being dramatically worse if
schedule_tail() is moved into C. This is because the arg for
schedule_tail() is already in a0 so the extra stack manipulation
instructions end up taking up a lot of instructions.
With this change:
<ret_from_fork_asm>:
ff65b097 auipc ra,0xff65b
1ee080e7 jalr 494(ra) # ffffffff8005038a <schedule_tail>
8526 mv a0,s1
85a2 mv a1,s0
860a mv a2,sp
ff61b097 auipc ra,0xff61b
606080e7 jalr 1542(ra) # ffffffff800107b0 <ret_from_fork>
b5f5 j ffffffff809f509e <ret_from_exception>
<ret_from_fork>:
1101 addi sp,sp,-32
e822 sd s0,16(sp)
ec06 sd ra,24(sp)
1000 addi s0,sp,32
e991 bnez a1,ffffffff800107cc <ret_from_fork+0x1c>
8532 mv a0,a2
009db097 auipc ra,0x9db
a32080e7 jalr -1486(ra) # ffffffff809eb1ee <syscall_exit_to_user_mode>
60e2 ld ra,24(sp)
6442 ld s0,16(sp)
6105 addi sp,sp,32
8082 ret
<following instructions used only in the kernel thread case>
fec43423 sd a2,-24(s0)
9582 jalr a1
fe843603 ld a2,-24(s0)
8532 mv a0,a2
009db097 auipc ra,0x9db
a16080e7 jalr -1514(ra) # ffffffff809eb1ee <syscall_exit_to_user_mode>
60e2 ld ra,24(sp)
6442 ld s0,16(sp)
6105 addi sp,sp,32
8082 ret
Contrasted with what this looks like if schedule_tail() is called from
C.
<ret_from_fork_asm>:
85a6 mv a1,s1
8622 mv a2,s0
868a mv a3,sp
ff61b097 auipc ra,0xff61b
60e080e7 jalr 1550(ra) # ffffffff800107b0 <ret_from_fork>
bdd5 j ffffffff809f509e <ret_from_exception>
<ret_from_fork>:
7179 addi sp,sp,-48
f022 sd s0,32(sp)
ec26 sd s1,24(sp)
e84a sd s2,16(sp)
e44e sd s3,8(sp)
f406 sd ra,40(sp)
1800 addi s0,sp,48
84b2 mv s1,a2
89ae mv s3,a1
8936 mv s2,a3
3c73f0ef jal ffffffff8005038a <schedule_tail>
ec89 bnez s1,ffffffff800107e2 <ret_from_fork+0x32>
854a mv a0,s2
009db097 auipc ra,0x9db
a22080e7 jalr -1502(ra) # ffffffff809eb1ee <syscall_exit_to_user_mode>
70a2 ld ra,40(sp)
7402 ld s0,32(sp)
64e2 ld s1,24(sp)
6942 ld s2,16(sp)
69a2 ld s3,8(sp)
6145 addi sp,sp,48
8082 ret
854e mv a0,s3
9482 jalr s1
b7d5 j ffffffff800107ca <ret_from_fork+0x1a>
ret_from_fork_asm ends up being 2 instructions more when calling from
asm, but the user fork ret_from_fork ends up being only 12 instructions
rather than 22 instructions when calling from C. If we were able to mix
asm and C code in a naked function we would be able to get rid of the
stack manipulation and still be able to inline C but we don't live in
that world...
- Charlie
>
>
> Brian Gerst
Powered by blists - more mailing lists