lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z5Pakko46SmUsz9d@ghost>
Date: Fri, 24 Jan 2025 10:23:14 -0800
From: Charlie Jenkins <charlie@...osinc.com>
To: Brian Gerst <brgerst@...il.com>
Cc: Paul Walmsley <paul.walmsley@...ive.com>,
	Palmer Dabbelt <palmer@...belt.com>,
	Huacai Chen <chenhuacai@...nel.org>,
	WANG Xuerui <kernel@...0n.name>,
	Thomas Gleixner <tglx@...utronix.de>,
	Peter Zijlstra <peterz@...radead.org>,
	Andy Lutomirski <luto@...nel.org>,
	Alexandre Ghiti <alexghiti@...osinc.com>,
	linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org,
	loongarch@...ts.linux.dev
Subject: Re: [PATCH v2 1/4] riscv: entry: Convert ret_from_fork() to C

On Fri, Jan 24, 2025 at 08:14:08AM -0500, Brian Gerst wrote:
> On Thu, Jan 23, 2025 at 2:15 PM Charlie Jenkins <charlie@...osinc.com> wrote:
> >
> > Move the main section of ret_from_fork() to C to allow inlining of
> > syscall_exit_to_user_mode().
> >
> > Signed-off-by: Charlie Jenkins <charlie@...osinc.com>
> > ---
> >  arch/riscv/include/asm/asm-prototypes.h |  1 +
> >  arch/riscv/kernel/entry.S               | 15 ++++++---------
> >  arch/riscv/kernel/process.c             | 14 ++++++++++++--
> >  3 files changed, 19 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
> > index cd627ec289f163a630b73dd03dd52a6b28692997..733ff609778797001006c33bba9e3cc5b1f15387 100644
> > --- a/arch/riscv/include/asm/asm-prototypes.h
> > +++ b/arch/riscv/include/asm/asm-prototypes.h
> > @@ -52,6 +52,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
> >  DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
> >  DECLARE_DO_ERROR_INFO(do_trap_break);
> >
> > +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs);
> >  asmlinkage void handle_bad_stack(struct pt_regs *regs);
> >  asmlinkage void do_page_fault(struct pt_regs *regs);
> >  asmlinkage void do_irq(struct pt_regs *regs);
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow)
> >  ASM_NOKPROBE(handle_kernel_stack_overflow)
> >  #endif
> >
> > -SYM_CODE_START(ret_from_fork)
> > +SYM_CODE_START(ret_from_fork_asm)
> >         call schedule_tail
> > -       beqz s0, 1f     /* not from kernel thread */
> > -       /* Call fn(arg) */
> > -       move a0, s1
> > -       jalr s0
> > -1:
> > -       move a0, sp /* pt_regs */
> > -       call syscall_exit_to_user_mode
> > +       move a0, s1 /* fn */
> > +       move a1, s0 /* fn_arg */
> > +       move a2, sp /* pt_regs */
> > +       call ret_from_fork
> >         j ret_from_exception
> > -SYM_CODE_END(ret_from_fork)
> > +SYM_CODE_END(ret_from_fork_asm)
> >
> >  #ifdef CONFIG_IRQ_STACKS
> >  /*
> > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> > index 58b6482c2bf662bf5224ca50c8e21a68760a6b41..0d07e6d8f6b57beba438dbba5e8c74a014582bee 100644
> > --- a/arch/riscv/kernel/process.c
> > +++ b/arch/riscv/kernel/process.c
> > @@ -17,7 +17,9 @@
> >  #include <linux/ptrace.h>
> >  #include <linux/uaccess.h>
> >  #include <linux/personality.h>
> > +#include <linux/entry-common.h>
> >
> > +#include <asm/asm-prototypes.h>
> >  #include <asm/unistd.h>
> >  #include <asm/processor.h>
> >  #include <asm/csr.h>
> > @@ -36,7 +38,7 @@ unsigned long __stack_chk_guard __read_mostly;
> >  EXPORT_SYMBOL(__stack_chk_guard);
> >  #endif
> >
> > -extern asmlinkage void ret_from_fork(void);
> > +extern asmlinkage void ret_from_fork_asm(void);
> >
> >  void noinstr arch_cpu_idle(void)
> >  {
> > @@ -206,6 +208,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> >         return 0;
> >  }
> >
> > +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
> > +{
> > +       if (unlikely(fn))
> > +               fn(fn_arg);
> > +
> > +       syscall_exit_to_user_mode(regs);
> > +}
> > +
> >  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >  {
> >         unsigned long clone_flags = args->flags;
> > @@ -242,7 +252,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >         p->thread.riscv_v_flags = 0;
> >         if (has_vector())
> >                 riscv_v_thread_alloc(p);
> > -       p->thread.ra = (unsigned long)ret_from_fork;
> > +       p->thread.ra = (unsigned long)ret_from_fork_asm;
> >         p->thread.sp = (unsigned long)childregs; /* kernel sp */
> >         return 0;
> >  }
> >
> > --
> > 2.43.0
> >
> >
> 
> Is there a specific reason you didn't move the call to schedule_tail()
> to the C function, like on x86?

Yes, the generated code ends up being dramatically worse if
schedule_tail() is moved into C. This is because the arg for
schedule_tail() is already in a0 so the extra stack manipulation
instructions end up taking up a lot of instructions.

With this change:
<ret_from_fork_asm>:
       ff65b097                auipc   ra,0xff65b
       1ee080e7                jalr    494(ra) # ffffffff8005038a <schedule_tail>
       8526                    mv      a0,s1
       85a2                    mv      a1,s0
       860a                    mv      a2,sp
       ff61b097                auipc   ra,0xff61b
       606080e7                jalr    1542(ra) # ffffffff800107b0 <ret_from_fork>
       b5f5                    j       ffffffff809f509e <ret_from_exception>


<ret_from_fork>:
       1101                    addi    sp,sp,-32
       e822                    sd      s0,16(sp)
       ec06                    sd      ra,24(sp)
       1000                    addi    s0,sp,32
       e991                    bnez    a1,ffffffff800107cc <ret_from_fork+0x1c>
       8532                    mv      a0,a2
       009db097                auipc   ra,0x9db
       a32080e7                jalr    -1486(ra) # ffffffff809eb1ee <syscall_exit_to_user_mode>
       60e2                    ld      ra,24(sp)
       6442                    ld      s0,16(sp)
       6105                    addi    sp,sp,32
       8082                    ret
       <following instructions used only in the kernel thread case>
       fec43423                sd      a2,-24(s0)
       9582                    jalr    a1
       fe843603                ld      a2,-24(s0)
       8532                    mv      a0,a2
       009db097                auipc   ra,0x9db
       a16080e7                jalr    -1514(ra) # ffffffff809eb1ee <syscall_exit_to_user_mode>
       60e2                    ld      ra,24(sp)
       6442                    ld      s0,16(sp)
       6105                    addi    sp,sp,32
       8082                    ret

Contrasted with what this looks like if schedule_tail() is called from
C.

<ret_from_fork_asm>:
       85a6                    mv      a1,s1
       8622                    mv      a2,s0
       868a                    mv      a3,sp
       ff61b097                auipc   ra,0xff61b
       60e080e7                jalr    1550(ra) # ffffffff800107b0 <ret_from_fork>
       bdd5                    j       ffffffff809f509e <ret_from_exception>

<ret_from_fork>:
       7179                    addi    sp,sp,-48
       f022                    sd      s0,32(sp)
       ec26                    sd      s1,24(sp)
       e84a                    sd      s2,16(sp)
       e44e                    sd      s3,8(sp)
       f406                    sd      ra,40(sp)
       1800                    addi    s0,sp,48
       84b2                    mv      s1,a2
       89ae                    mv      s3,a1
       8936                    mv      s2,a3
       3c73f0ef                jal     ffffffff8005038a <schedule_tail>
       ec89                    bnez    s1,ffffffff800107e2 <ret_from_fork+0x32>
       854a                    mv      a0,s2
       009db097                auipc   ra,0x9db
       a22080e7                jalr    -1502(ra) # ffffffff809eb1ee <syscall_exit_to_user_mode>
       70a2                    ld      ra,40(sp)
       7402                    ld      s0,32(sp)
       64e2                    ld      s1,24(sp)
       6942                    ld      s2,16(sp)
       69a2                    ld      s3,8(sp)
       6145                    addi    sp,sp,48
       8082                    ret
       854e                    mv      a0,s3
       9482                    jalr    s1
       b7d5                    j       ffffffff800107ca <ret_from_fork+0x1a>


ret_from_fork_asm ends up being 2 instructions more when calling from
asm, but the user fork ret_from_fork ends up being only 12 instructions
rather than 22 instructions when calling from C. If we were able to mix
asm and C code in a naked function we would be able to get rid of the
stack manipulation and still be able to inline C but we don't live in
that world...

- Charlie

> 
> 
> Brian Gerst

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ