[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMzpN2jSo+BXe_381Y7iEzntzpG7fMQHML4cMq1kLS8X_C7HPw@mail.gmail.com>
Date: Sat, 20 Dec 2025 12:10:13 -0500
From: Brian Gerst <brgerst@...il.com>
To: ellyndra <ellyesparza8@...il.com>
Cc: Peter Zijlstra <peterz@...radead.org>, linux-kernel@...r.kernel.org, luto@...nel.org,
tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com
Subject: Re: [PATCH v2] x86/syscall: make sys_call helpers static
On Sat, Dec 20, 2025 at 11:52 AM ellyndra <ellyesparza8@...il.com> wrote:
>
> On Sat, Dec 20, 2025 at 11:32 AM Brian Gerst <brgerst@...il.com> wrote:
> >
> > On Sat, Dec 20, 2025 at 9:10 AM Peter Zijlstra <peterz@...radead.org> wrote:
> > >
> > > On Sat, Dec 20, 2025 at 08:50:31AM -0500, Brian Gerst wrote:
> > > > On Thu, Dec 18, 2025 at 10:01 AM ellyndra <ellyesparza8@...il.com> wrote:
> > > > >
> > > > > Make 'x64_sys_call()' 'x32_sys_call()' and 'ia32_sys_call()' static to
> > > > > prevent them from being resolved via kallsyms and by extension kprobes.
> > > > >
> > > > > These functions contain the final calls to the syscall handlers, having
> > > > > access to their address can be used to hook this calls altering their
> > > > > behavior. Preventing symbol visibility limits the area of such attacks.
> > > > >
> > > > > Suggested-by: Peter Zijlstra <peterz@...radead.org>
> > > > > Signed-off-by: Elly I Esparza <ellyesparza8@...il.com>
> > > > > ---
> > > > > arch/x86/entry/syscall_32.c | 6 ++++--
> > > > > arch/x86/entry/syscall_64.c | 12 ++++++++----
> > > > > arch/x86/include/asm/syscall.h | 8 --------
> > > > > 3 files changed, 12 insertions(+), 14 deletions(-)
> > > > >
> > > > > diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
> > > > > index 2b15ea17bb7c..28c9a99fb2de 100644
> > > > > --- a/arch/x86/entry/syscall_32.c
> > > > > +++ b/arch/x86/entry/syscall_32.c
> > > > > @@ -41,7 +41,7 @@ const sys_call_ptr_t sys_call_table[] = {
> > > > > #endif
> > > > >
> > > > > #define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
> > > > > -long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > +static __always_inline long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > {
> > > > > switch (nr) {
> > > > > #include <asm/syscalls_32.h>
> > > > > @@ -70,7 +70,7 @@ early_param("ia32_emulation", ia32_emulation_override_cmdline);
> > > > > /*
> > > > > * Invoke a 32-bit syscall. Called with IRQs on in CT_STATE_KERNEL.
> > > > > */
> > > > > -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
> > > > > +static noinstr void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
> > > > > {
> > > > > /*
> > > > > * Convert negative numbers to very high and thus out of range
> > > > > @@ -78,12 +78,14 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
> > > > > */
> > > > > unsigned int unr = nr;
> > > > >
> > > > > + instrumentation_begin();
> > > > > if (likely(unr < IA32_NR_syscalls)) {
> > > > > unr = array_index_nospec(unr, IA32_NR_syscalls);
> > > > > regs->ax = ia32_sys_call(regs, unr);
> > > > > } else if (nr != -1) {
> > > > > regs->ax = __ia32_sys_ni_syscall(regs);
> > > > > }
> > > > > + instrumentation_end();
> > > > > }
> > > > >
> > > > > #ifdef CONFIG_IA32_EMULATION
> > > > > diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
> > > > > index b6e68ea98b83..55ad274f57f5 100644
> > > > > --- a/arch/x86/entry/syscall_64.c
> > > > > +++ b/arch/x86/entry/syscall_64.c
> > > > > @@ -32,7 +32,7 @@ const sys_call_ptr_t sys_call_table[] = {
> > > > > #undef __SYSCALL
> > > > >
> > > > > #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
> > > > > -long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > +static __always_inline long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > {
> > > > > switch (nr) {
> > > > > #include <asm/syscalls_64.h>
> > > > > @@ -40,15 +40,17 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > }
> > > > > }
> > > > >
> > > > > -#ifdef CONFIG_X86_X32_ABI
> > > > > -long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > +static __always_inline long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
> > > > > {
> > > > > +#ifdef CONFIG_X86_X32_ABI
> > > > > switch (nr) {
> > > > > #include <asm/syscalls_x32.h>
> > > > > default: return __x64_sys_ni_syscall(regs);
> > > > > }
> > > > > -}
> > > > > +#else
> > > > > + return __x64_sys_ni_syscall(regs);
> > > > > #endif
> > > > > +}
> > > > >
> > > > > static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
> > > > > {
> > > > > @@ -58,11 +60,13 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
> > > > > */
> > > > > unsigned int unr = nr;
> > > > >
> > > > > + instrumentation_begin();
> > > > > if (likely(unr < NR_syscalls)) {
> > > > > unr = array_index_nospec(unr, NR_syscalls);
> > > > > regs->ax = x64_sys_call(regs, unr);
> > > > > return true;
> > > > > }
> > > > > + instrumentation_end();
> > > > > return false;
> > > > > }
> > > > >
> > > > > diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> > > > > index c10dbb74cd00..59a406074dc0 100644
> > > > > --- a/arch/x86/include/asm/syscall.h
> > > > > +++ b/arch/x86/include/asm/syscall.h
> > > > > @@ -20,14 +20,6 @@
> > > > > typedef long (*sys_call_ptr_t)(const struct pt_regs *);
> > > > > extern const sys_call_ptr_t sys_call_table[];
> > > > >
> > > > > -/*
> > > > > - * These may not exist, but still put the prototypes in so we
> > > > > - * can use IS_ENABLED().
> > > > > - */
> > > > > -extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
> > > > > -extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
> > > > > -extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
> > > > > -
> > > > > /*
> > > > > * Only the low 32 bits of orig_ax are meaningful, so we return int.
> > > > > * This importantly ignores the high bits on 64-bit, so comparisons
> > > > > --
> > > > > 2.43.0
> > > > >
> > > > >
> > > >
> > > > The main benefit of having the switch in a discrete function is that
> > > > it allows the compiler to do tail-call optimizations, reducing text
> > > > size. Isn't it possible to blacklist functions from kprobes?
> > >
> > > Ofcourse. But I'm not sure I understand your concern. Both
> > > x{64,32}_sys_call() only have a single caller, inlining them avoids the
> > > whole call, tail or otherwise, how is that a bad thing?
> > >
> >
> > text data bss dec hex filename
> > 13194 128 0 13322 340a
> > arch/x86/entry/syscall_64.o
> > 10239 136 0 10375 2887
> > arch/x86/entry/syscall_64.o.orig
> >
> > That's a 3k increase in text size when it's inlined.
> >
> >
> > Brian Gerst
>
> I just check size too and agree with Brian. Regarding
> blacklisting, that was the original idea, it has the added problem
> of kallsyms_lookup_name() being available to bypass the blacklist.
>
> Is this fine for next version then?
>
> diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
> index 2b15ea17bb7c..75d34d2c8067 100644
> --- a/arch/x86/entry/syscall_32.c
> +++ b/arch/x86/entry/syscall_32.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0-only
> /* 32-bit system call dispatch */
>
> +#include <linux/kprobes.h>
> #include <linux/linkage.h>
> #include <linux/sys.h>
> #include <linux/cache.h>
> @@ -48,6 +49,7 @@ long ia32_sys_call(const struct pt_regs *regs,
> unsigned int nr)
> default: return __ia32_sys_ni_syscall(regs);
> }
> }
> +NOKPROBE_SYMBOL(ia32_sys_call)
>
> static __always_inline int syscall_32_enter(struct pt_regs *regs)
> {
> diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
> index b6e68ea98b83..1e8045a499d4 100644
> --- a/arch/x86/entry/syscall_64.c
> +++ b/arch/x86/entry/syscall_64.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0-only
> /* 64-bit system call dispatch */
>
> +#include <linux/kprobes.h>
> #include <linux/linkage.h>
> #include <linux/sys.h>
> #include <linux/cache.h>
> @@ -39,6 +40,7 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
> default: return __x64_sys_ni_syscall(regs);
> }
> }
> +NOKPROBE_SYMBOL(x64_sys_call)
>
> #ifdef CONFIG_X86_X32_ABI
> long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
> @@ -48,6 +50,7 @@ long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
> default: return __x64_sys_ni_syscall(regs);
> }
> }
> +NOKPROBE_SYMBOL(x32_sys_call)
> #endif
>
> static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 1e7635864124..13a7c0fdb5da 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -234,6 +234,7 @@ unsigned long kallsyms_lookup_name(const char *name)
>
> return module_kallsyms_lookup_name(name);
> }
> +NOKPROBE_SYMBOL(kallsyms_lookup_name)
>
> /*
> * Iterate over all symbols in vmlinux. For symbols from modules use
Hardening isn't my area of expertise, but what prevents hooking
individual syscalls in the same manner?
Brian Gerst
Powered by blists - more mailing lists