lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240617084516.1484390-2-xin@zytor.com>
Date: Mon, 17 Jun 2024 01:45:13 -0700
From: "Xin Li (Intel)" <xin@...or.com>
To: linux-kernel@...r.kernel.org
Cc: luto@...nel.org, tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
        dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com,
        peterz@...radead.org, brgerst@...il.com
Subject: [PATCH v1 1/3] x86/fred: Allow variable-sized event frame

A FRED event frame could contain different amount of information for
different event types, or perhaps even for different instances of the
same event type. Thus the size of an event frame pushed by a FRED CPU
is not fixed and the address of the pt_regs structure that is used to
save a user level context of current task is not at a fixed offset
from top of current task kernel stack.

Add a new field named 'user_pt_regs' in the thread_info structure to
save the address of user level context pt_regs structure, thus to
eliminate the need of any advance information of event frame size
and allow a FRED CPU to push variable-sized event frame.

For IDT user level event delivery, a pt_regs structure is pushed by
hardware and software _always_ at a fixed offset from top of current
task kernel stack, so simply initialize user_pt_regs to point to the
pt_regs structure no matter whether one is pushed or not.

While for FRED user level event delivery, user_pt_regs is updated with
a pt_regs structure pointer generated in asm_fred_entrypoint_user().

Suggested-by: H. Peter Anvin (Intel) <hpa@...or.com>
Signed-off-by: Xin Li (Intel) <xin@...or.com>
---
 arch/x86/entry/entry_fred.c        | 22 ++++++++++++++++++++++
 arch/x86/include/asm/processor.h   | 18 ++++++++++++------
 arch/x86/include/asm/thread_info.h |  9 ++++++---
 arch/x86/kernel/process.c          | 22 ++++++++++++++++++++++
 include/linux/thread_info.h        |  1 +
 kernel/fork.c                      |  6 ++++++
 6 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index f004a4dc74c2..1d54d451acb6 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -228,6 +228,28 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
 	/* Invalidate orig_ax so that syscall_get_nr() works correctly */
 	regs->orig_ax = -1;
 
+	/*
+	 * A FRED event frame could contain different amount of information
+	 * for different event types, or perhaps even for different instances
+	 * of the same event type. Thus the size of an event frame pushed by
+	 * a FRED CPU is not fixed and the address of the pt_regs structure
+	 * that is used to save a user level context of current task is not
+	 * at a fixed offset from top of current task stack.
+	 *
+	 * Save the address of the pt_regs structure passed from and generated
+	 * in the caller function asm_fred_entrypoint_user() in thread_info so
+	 * that task_pt_regs() can be used to access the pt_regs structure
+	 * containing user level context after this point.
+	 *
+	 * What if another event happens before this point?
+	 *
+	 * Actually, another kernel event could happen earlier, even before the
+	 * pt_regs structure for saving user level context is completely saved.
+	 * It is guaranteed that the handler of the new event will NOT access
+	 * the pt_regs structure of the previous user level event.
+	 */
+	current->thread_info.user_pt_regs = regs;
+
 	switch (regs->fred_ss.type) {
 	case EVENT_TYPE_EXTINT:
 		return fred_extint(regs);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bd0621210f63..ea7733e7bf1d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -634,12 +634,18 @@ static __always_inline void prefetchw(const void *x)
 
 #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
 
-#define task_pt_regs(task) \
-({									\
-	unsigned long __ptr = (unsigned long)task_stack_page(task);	\
-	__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;		\
-	((struct pt_regs *)__ptr) - 1;					\
-})
+/*
+ * task_pt_regs() no longer converts a fixed offset from top of a task
+ * kernel stack to a pt_regs structure pointer, but rather returns
+ * whatever in the thread_info.user_pt_regs field, which contains the
+ * address of a pt_regs structure used to save a user level context of
+ * current task.
+ *
+ * Note, this can't be converted to an inline function as this header
+ * file defines 'struct thread_struct' which is used in the task_struct
+ * structure definition.
+ */
+#define task_pt_regs(task) ((task)->thread_info.user_pt_regs)
 
 #ifdef CONFIG_X86_32
 #define INIT_THREAD  {							  \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 12da7dfd5ef1..326268d440cf 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -56,6 +56,7 @@
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
+struct pt_regs;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
@@ -66,11 +67,13 @@ struct thread_info {
 #ifdef CONFIG_SMP
 	u32			cpu;		/* current CPU */
 #endif
+	struct pt_regs		*user_pt_regs;
 };
 
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.flags		= 0,			\
+#define INIT_THREAD_INFO(tsk)						\
+{									\
+	.flags		= 0,						\
+	.user_pt_regs	= (struct pt_regs *)TOP_OF_INIT_STACK - 1,	\
 }
 
 #else /* !__ASSEMBLY__ */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0c63035d8164..787a402e4ead 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -100,6 +100,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+/*
+ * Initialize thread_info.user_pt_regs for IDT event delivery.
+ *
+ * For IDT user level event delivery, a pt_regs structure is pushed by both
+ * hardware and software and always resides at a fixed offset from top of
+ * current task kernel stack, thus thread_info.user_pt_regs is a per-task
+ * constant and NEVER changes after initialization.
+ *
+ * While for FRED user level event delivery, user_pt_regs is updated in
+ * fred_entry_from_user() immediately after user level event delivery.
+ *
+ * Note: thread_info.user_pt_regs of the init task is initialized at build
+ * time.
+ */
+void arch_init_user_pt_regs(struct task_struct *tsk)
+{
+	unsigned long top_of_stack = (unsigned long)task_stack_page(tsk) + THREAD_SIZE;
+
+	top_of_stack -= TOP_OF_KERNEL_STACK_PADDING;
+	tsk->thread_info.user_pt_regs = (struct pt_regs *)top_of_stack - 1;
+}
+
 #ifdef CONFIG_X86_64
 void arch_release_task_struct(struct task_struct *tsk)
 {
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9ea0b28068f4..5b2a75a19a07 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -260,6 +260,7 @@ void arch_task_cache_init(void); /* for CONFIG_SH */
 void arch_release_task_struct(struct task_struct *tsk);
 int arch_dup_task_struct(struct task_struct *dst,
 				struct task_struct *src);
+void arch_init_user_pt_regs(struct task_struct *tsk);
 
 #endif	/* __KERNEL__ */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 99076dbe27d8..c4198599a7d4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1089,6 +1089,10 @@ int __weak arch_dup_task_struct(struct task_struct *dst,
 	return 0;
 }
 
+void __weak arch_init_user_pt_regs(struct task_struct *tsk)
+{
+}
+
 void set_task_stack_end_magic(struct task_struct *tsk)
 {
 	unsigned long *stackend;
@@ -1116,6 +1120,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (err)
 		goto free_tsk;
 
+	arch_init_user_pt_regs(tsk);
+
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
-- 
2.45.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ