[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250313182236.655724-4-brgerst@gmail.com>
Date: Thu, 13 Mar 2025 14:22:34 -0400
From: Brian Gerst <brgerst@...il.com>
To: linux-kernel@...r.kernel.org,
x86@...nel.org
Cc: Ingo Molnar <mingo@...nel.org>,
"H . Peter Anvin" <hpa@...or.com>,
Thomas Gleixner <tglx@...utronix.de>,
Borislav Petkov <bp@...en8.de>,
Andy Lutomirski <luto@...nel.org>,
Juergen Gross <jgross@...e.com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
Brian Gerst <brgerst@...il.com>
Subject: [PATCH 3/5] x86/syscall/64: Move 64-bit syscall dispatch code
Move the 64-bit syscall dispatch code to syscall_64.c.
No functional changes.
Signed-off-by: Brian Gerst <brgerst@...il.com>
---
arch/x86/entry/Makefile | 2 +
arch/x86/entry/common.c | 93 --------------------------------
arch/x86/entry/syscall_64.c | 103 +++++++++++++++++++++++++++++++++++-
3 files changed, 103 insertions(+), 95 deletions(-)
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 96a6b86e0a8b..5fd28abfd5a0 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -9,9 +9,11 @@ KCOV_INSTRUMENT := n
CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_common.o += -fno-stack-protector
CFLAGS_syscall_32.o += -fno-stack-protector
+CFLAGS_syscall_64.o += -fno-stack-protector
obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
obj-y += common.o
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 183efabefe57..5bd448c0664f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -32,99 +32,6 @@
#include <asm/syscall.h>
#include <asm/irq_stack.h>
-#ifdef CONFIG_X86_64
-
-static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
-{
- /*
- * Convert negative numbers to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int unr = nr;
-
- if (likely(unr < NR_syscalls)) {
- unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = x64_sys_call(regs, unr);
- return true;
- }
- return false;
-}
-
-static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
-{
- /*
- * Adjust the starting offset of the table, and convert numbers
- * < __X32_SYSCALL_BIT to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int xnr = nr - __X32_SYSCALL_BIT;
-
- if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
- xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call(regs, xnr);
- return true;
- }
- return false;
-}
-
-/* Returns true to return using SYSRET, or false to use IRET */
-__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
-{
- add_random_kstack_offset();
- nr = syscall_enter_from_user_mode(regs, nr);
-
- instrumentation_begin();
-
- if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
- /* Invalid system call, but still a system call. */
- regs->ax = __x64_sys_ni_syscall(regs);
- }
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
-
- /*
- * Check that the register state is valid for using SYSRET to exit
- * to userspace. Otherwise use the slower but fully capable IRET
- * exit path.
- */
-
- /* XEN PV guests always use the IRET path */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
- return false;
-
- /* SYSRET requires RCX == RIP and R11 == EFLAGS */
- if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
- return false;
-
- /* CS and SS must match the values set in MSR_STAR */
- if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
- return false;
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * TASK_SIZE_MAX covers all user-accessible addresses other than
- * the deprecated vsyscall page.
- */
- if (unlikely(regs->ip >= TASK_SIZE_MAX))
- return false;
-
- /*
- * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET.
- */
- if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
- return false;
-
- /* Use SYSRET to exit to userspace */
- return true;
-}
-#endif
-
SYSCALL_DEFINE0(ni_syscall)
{
return -ENOSYS;
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index ba8354424860..9e0ba339013c 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -1,10 +1,19 @@
-// SPDX-License-Identifier: GPL-2.0
-/* System call table for x86-64. */
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * 64-bit system call dispatch
+ *
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * Based on asm and ptrace code by many authors. The code here originated
+ * in ptrace.c and signal.c.
+ */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
+#include <linux/entry-common.h>
+#include <linux/nospec.h>
#include <asm/syscall.h>
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
@@ -34,3 +43,93 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
default: return __x64_sys_ni_syscall(regs);
}
};
+
+static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+{
+ /*
+ * Convert negative numbers to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int unr = nr;
+
+ if (likely(unr < NR_syscalls)) {
+ unr = array_index_nospec(unr, NR_syscalls);
+ regs->ax = x64_sys_call(regs, unr);
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+{
+ /*
+ * Adjust the starting offset of the table, and convert numbers
+ * < __X32_SYSCALL_BIT to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int xnr = nr - __X32_SYSCALL_BIT;
+
+ if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+ xnr = array_index_nospec(xnr, X32_NR_syscalls);
+ regs->ax = x32_sys_call(regs, xnr);
+ return true;
+ }
+ return false;
+}
+
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
+{
+ add_random_kstack_offset();
+ nr = syscall_enter_from_user_mode(regs, nr);
+
+ instrumentation_begin();
+
+ if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
+ /* Invalid system call, but still a system call. */
+ regs->ax = __x64_sys_ni_syscall(regs);
+ }
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use the IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ *
+ * TASK_SIZE_MAX covers all user-accessible addresses other than
+ * the deprecated vsyscall page.
+ */
+ if (unlikely(regs->ip >= TASK_SIZE_MAX))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
+}
--
2.48.1
Powered by blists - more mailing lists