[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <0e04f8d34cb52320110057021184d8facec7e1a6.1616004689.git.luto@kernel.org>
Date: Wed, 17 Mar 2021 11:12:43 -0700
From: Andy Lutomirski <luto@...nel.org>
To: x86@...nel.org
Cc: LKML <linux-kernel@...r.kernel.org>,
Mark Rutland <mark.rutland@....com>,
Brian Gerst <brgerst@...il.com>,
Andy Lutomirski <luto@...nel.org>
Subject: [PATCH v4 4/9] kentry: Simplify the common syscall API
The new common syscall API had a large and confusing API surface. Simplify
it. Now there is exactly one way to use it. It's a bit more verbose than
the old way for the simple x86_64 native case, but it's much easier to use
right, and the diffstat should speak for itself.
Acked-by: Mark Rutland <mark.rutland@....com>
Signed-off-by: Andy Lutomirski <luto@...nel.org>
---
arch/x86/entry/common.c | 57 +++++++++++++++---------
include/linux/entry-common.h | 86 ++++++------------------------------
kernel/entry/common.c | 57 +++---------------------
3 files changed, 54 insertions(+), 146 deletions(-)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ef1c65938a6b..8710b2300b8d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -38,9 +38,12 @@
#ifdef CONFIG_X86_64
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
- nr = syscall_enter_from_user_mode(regs, nr);
-
+ kentry_enter_from_user_mode(regs);
+ local_irq_enable();
instrumentation_begin();
+
+ nr = kentry_syscall_begin(regs, nr);
+
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
@@ -52,8 +55,12 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
regs->ax = x32_sys_call_table[nr](regs);
#endif
}
+
+ kentry_syscall_end(regs);
+
+ local_irq_disable();
instrumentation_end();
- syscall_exit_to_user_mode(regs);
+ kentry_exit_to_user_mode(regs);
}
#endif
@@ -83,33 +90,34 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
+ kentry_enter_from_user_mode(regs);
+ local_irq_enable();
+ instrumentation_begin();
+
/*
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
* orig_ax, the unsigned int return value truncates it. This may
* or may not be necessary, but it matches the old asm behavior.
*/
- nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
- instrumentation_begin();
-
+ nr = (unsigned int)kentry_syscall_begin(regs, nr);
do_syscall_32_irqs_on(regs, nr);
+ kentry_syscall_end(regs);
+ local_irq_disable();
instrumentation_end();
- syscall_exit_to_user_mode(regs);
+ kentry_exit_to_user_mode(regs);
}
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
+ bool ret;
int res;
- /*
- * This cannot use syscall_enter_from_user_mode() as it has to
- * fetch EBP before invoking any of the syscall entry work
- * functions.
- */
- syscall_enter_from_user_mode_prepare(regs);
-
+ kentry_enter_from_user_mode(regs);
+ local_irq_enable();
instrumentation_begin();
+
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
@@ -126,21 +134,23 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
if (res) {
/* User code screwed up. */
regs->ax = -EFAULT;
-
- instrumentation_end();
- local_irq_disable();
- irqentry_exit_to_user_mode(regs);
- return false;
+ ret = false;
+ goto out;
}
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
- nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+ nr = (unsigned int)kentry_syscall_begin(regs, nr);
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr);
+ kentry_syscall_end(regs);
+ ret = true;
+
+out:
+ local_irq_disable();
instrumentation_end();
- syscall_exit_to_user_mode(regs);
+ kentry_exit_to_user_mode(regs);
return true;
}
@@ -233,8 +243,11 @@ __visible void noinstr ret_from_fork(struct task_struct *prev,
user_regs->ax = 0;
}
+ kentry_syscall_end(user_regs);
+
+ local_irq_disable();
instrumentation_end();
- syscall_exit_to_user_mode(user_regs);
+ kentry_exit_to_user_mode(user_regs);
}
#ifdef CONFIG_XEN_PV
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index fd2d7c35670a..5287c6c15a66 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -119,31 +119,12 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
void enter_from_user_mode(struct pt_regs *regs);
/**
- * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
- * @regs: Pointer to currents pt_regs
- *
- * Invoked from architecture specific syscall entry code with interrupts
- * disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct, interrupts are enabled and the
- * subsequent functions can be instrumented.
- *
- * This handles lockdep, RCU (context tracking) and tracing state, i.e.
- * the functionality provided by enter_from_user_mode().
- *
- * This is invoked when there is extra architecture specific functionality
- * to be done between establishing state and handling user mode entry work.
- */
-void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
-
-/**
- * syscall_enter_from_user_mode_work - Check and handle work before invoking
- * a syscall
+ * kentry_syscall_begin - Prepare to invoke a syscall handler
* @regs: Pointer to currents pt_regs
* @syscall: The syscall number
*
* Invoked from architecture specific syscall entry code with interrupts
- * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
- * architecture specific work.
+ * enabled after kentry_enter_from_usermode or a similar function.
*
* Returns: The original or a modified syscall number
*
@@ -152,32 +133,16 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
* syscall_set_return_value() first. If neither of those are called and -1
* is returned, then the syscall will fail with ENOSYS.
*
+ * After calling kentry_syscall_begin(), regardless of the return value,
+ * the caller must call kentry_syscall_end().
+ *
* It handles the following work items:
*
* 1) syscall_work flag dependent invocations of
* arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter()
* 2) Invocation of audit_syscall_entry()
*/
-long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
-
-/**
- * syscall_enter_from_user_mode - Establish state and check and handle work
- * before invoking a syscall
- * @regs: Pointer to currents pt_regs
- * @syscall: The syscall number
- *
- * Invoked from architecture specific syscall entry code with interrupts
- * disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct, interrupts are enabled and the
- * subsequent functions can be instrumented.
- *
- * This is combination of syscall_enter_from_user_mode_prepare() and
- * syscall_enter_from_user_mode_work().
- *
- * Returns: The original or a modified syscall number. See
- * syscall_enter_from_user_mode_work() for further explanation.
- */
-long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
+long kentry_syscall_begin(struct pt_regs *regs, long syscall);
/**
* local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
@@ -317,28 +282,16 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
void exit_to_user_mode(void);
/**
- * syscall_exit_to_user_mode_work - Handle work before returning to user mode
+ * kentry_syscall_end - Finish syscall processing
* @regs: Pointer to currents pt_regs
*
- * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
- * exit_to_user_mode() to perform the final transition to user mode.
*
- * Calling convention is the same as for syscall_exit_to_user_mode() and it
- * returns with all work handled and interrupts disabled. The caller must
- * invoke exit_to_user_mode() before actually switching to user mode to
- * make the final state transitions. Interrupts must stay disabled between
- * return from this function and the invocation of exit_to_user_mode().
- */
-void syscall_exit_to_user_mode_work(struct pt_regs *regs);
-
-/**
- * syscall_exit_to_user_mode - Handle work before returning to user mode
- * @regs: Pointer to currents pt_regs
+ * This must be called after arch code calls kentry_syscall_begin()
+ * and invoking a syscall handler, if any. This must also be called when
+ * returning from fork() to user mode, since return-from-fork is considered
+ * to be a syscall return.
*
- * Invoked with interrupts enabled and fully valid regs. Returns with all
- * work handled, interrupts disabled such that the caller can immediately
- * switch to user mode. Called from architecture specific syscall and ret
- * from fork code.
+ * Called with IRQs on. Returns with IRQs still on.
*
* The call order is:
* 1) One-time syscall exit work:
@@ -346,21 +299,8 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs);
* - audit
* - syscall tracing
* - tracehook (single stepping)
- *
- * 2) Preparatory work
- * - Exit to user mode loop (common TIF handling). Invokes
- * arch_exit_to_user_mode_work() for architecture specific TIF work
- * - Architecture specific one time work arch_exit_to_user_mode_prepare()
- * - Address limit and lockdep checks
- *
- * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
- * functionality in exit_to_user_mode().
- *
- * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
- * exit_to_user_mode(). This function is preferred unless there is a
- * compelling architectural reason to use the seperate functions.
*/
-void syscall_exit_to_user_mode(struct pt_regs *regs);
+void kentry_syscall_end(struct pt_regs *regs);
/**
* kentry_enter_from_user_mode - Establish state before invoking the irq handler
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 269766a8f981..800ad406431b 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -80,44 +80,19 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
return ret ? : syscall;
}
-static __always_inline long
-__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
+long kentry_syscall_begin(struct pt_regs *regs, long syscall)
{
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+ CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+ lockdep_assert_irqs_enabled();
+
if (work & SYSCALL_WORK_ENTER)
syscall = syscall_trace_enter(regs, syscall, work);
return syscall;
}
-long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
-{
- return __syscall_enter_from_user_work(regs, syscall);
-}
-
-noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
-{
- long ret;
-
- __enter_from_user_mode(regs);
-
- instrumentation_begin();
- local_irq_enable();
- ret = __syscall_enter_from_user_work(regs, syscall);
- instrumentation_end();
-
- return ret;
-}
-
-noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
-{
- __enter_from_user_mode(regs);
- instrumentation_begin();
- local_irq_enable();
- instrumentation_end();
-}
-
/* See comment for exit_to_user_mode() in entry-common.h */
static __always_inline void __exit_to_user_mode(void)
{
@@ -218,7 +193,7 @@ static inline bool report_single_step(unsigned long work)
/*
* If SYSCALL_EMU is set, then the only reason to report is when
* TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
- * instruction has been already reported in syscall_enter_from_user_mode().
+ * instruction has been already reported in kentry_syscall_begin().
*/
static inline bool report_single_step(unsigned long work)
{
@@ -261,7 +236,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
* Syscall specific exit to user mode preparation. Runs with interrupts
* enabled.
*/
-static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
+void kentry_syscall_end(struct pt_regs *regs)
{
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
unsigned long nr = syscall_get_nr(current, regs);
@@ -284,26 +259,6 @@ static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
syscall_exit_work(regs, work);
}
-static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
- syscall_exit_to_user_mode_prepare(regs);
- local_irq_disable_exit_to_user();
- exit_to_user_mode_prepare(regs);
-}
-
-void syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
- __syscall_exit_to_user_mode_work(regs);
-}
-
-__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
-{
- instrumentation_begin();
- __syscall_exit_to_user_mode_work(regs);
- instrumentation_end();
- __exit_to_user_mode();
-}
-
noinstr void kentry_enter_from_user_mode(struct pt_regs *regs)
{
__enter_from_user_mode(regs);
--
2.30.2
Powered by blists - more mailing lists