lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240214113516.2307-6-petrtesarik@huaweicloud.com>
Date: Wed, 14 Feb 2024 12:35:13 +0100
From: Petr Tesarik <petrtesarik@...weicloud.com>
To: Jonathan Corbet <corbet@....net>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	Borislav Petkov <bp@...en8.de>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	x86@...nel.org (maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)),
	"H. Peter Anvin" <hpa@...or.com>,
	Andy Lutomirski <luto@...nel.org>,
	Oleg Nesterov <oleg@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Xin Li <xin3.li@...el.com>,
	Arnd Bergmann <arnd@...db.de>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Rick Edgecombe <rick.p.edgecombe@...el.com>,
	Kees Cook <keescook@...omium.org>,
	"Masami Hiramatsu (Google)" <mhiramat@...nel.org>,
	Pengfei Xu <pengfei.xu@...el.com>,
	Josh Poimboeuf <jpoimboe@...nel.org>,
	Ze Gao <zegao2021@...il.com>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Kai Huang <kai.huang@...el.com>,
	David Woodhouse <dwmw@...zon.co.uk>,
	Brian Gerst <brgerst@...il.com>,
	Jason Gunthorpe <jgg@...pe.ca>,
	Joerg Roedel <jroedel@...e.de>,
	"Mike Rapoport (IBM)" <rppt@...nel.org>,
	Tina Zhang <tina.zhang@...el.com>,
	Jacob Pan <jacob.jun.pan@...ux.intel.com>,
	linux-doc@...r.kernel.org (open list:DOCUMENTATION),
	linux-kernel@...r.kernel.org (open list)
Cc: Roberto Sassu <roberto.sassu@...weicloud.com>,
	petr@...arici.cz,
	Petr Tesarik <petr.tesarik1@...wei-partners.com>
Subject: [PATCH v1 5/8] sbm: x86: handle sandbox mode faults

From: Petr Tesarik <petr.tesarik1@...wei-partners.com>

Provide a fault handler for sandbox mode. Set the sandbox mode instance
error code, abort the sandbox and return to the caller. To allow graceful
return from a fatal fault, save all callee-saved registers (including the
stack pointer) just before passing control to the target function.

Modify the handlers for #PF and #DF CPU exceptions to call this handler if
coming from sandbox mode. The check is based on the saved CS register,
which should be modified in the entry path to a value that is otherwise not
possible (__SBM_CS).

For the page fault handler, make sure that sandbox mode check is placed
before do_kern_addr_fault(). That function calls spurious_kernel_fault(),
which implements lazy TLB invalidation of kernel pages and it assumes that
the faulting instruction ran with kernel-mode page tables; it would produce
false positives for sandbox mode.

Signed-off-by: Petr Tesarik <petr.tesarik1@...wei-partners.com>
---
 arch/x86/include/asm/ptrace.h  | 21 +++++++++++++++++++++
 arch/x86/include/asm/sbm.h     | 24 ++++++++++++++++++++++++
 arch/x86/include/asm/segment.h |  7 +++++++
 arch/x86/kernel/asm-offsets.c  |  5 +++++
 arch/x86/kernel/sbm/call_64.S  | 21 +++++++++++++++++++++
 arch/x86/kernel/sbm/core.c     | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/traps.c        | 11 +++++++++++
 arch/x86/mm/fault.c            |  6 ++++++
 8 files changed, 121 insertions(+)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b09c8f..f66f16f037b0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -164,6 +164,27 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
+/*
+ * sandbox_mode() - did a register set come from SandBox Mode?
+ * @regs:  register set
+ */
+static inline bool sandbox_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_SANDBOX_MODE
+	/*
+	 * SandBox Mode always runs in 64-bit and it is not implemented
+	 * on paravirt systems, so this is the only possible value.
+	 */
+	return regs->cs == __SBM_CS;
+#else /* !CONFIG_SANDBOX_MODE */
+	return false;
+#endif
+#else /* !CONFIG_X86_64 */
+	return false;
+#endif
+}
+
 /*
  * Determine whether the register set came from any context that is running in
  * 64-bit mode.
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
index ca4741b449e8..229b1ac3bbd4 100644
--- a/arch/x86/include/asm/sbm.h
+++ b/arch/x86/include/asm/sbm.h
@@ -11,23 +11,29 @@
 
 #include <asm/processor.h>
 
+struct pt_regs;
+
 #if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)
 
 #include <asm/pgtable_types.h>
 
 /**
  * struct x86_sbm_state - Run-time state of the environment.
+ * @sbm:         Link back to the SBM instance.
  * @pgd:         Sandbox mode page global directory.
  * @stack:       Sandbox mode stack.
  * @exc_stack:   Exception and IRQ stack.
+ * @return_sp:   Stack pointer for returning to kernel mode.
  *
  * One instance of this union is allocated for each sandbox and stored as SBM
  * instance private data.
  */
 struct x86_sbm_state {
+	struct sbm *sbm;
 	pgd_t *pgd;
 	unsigned long stack;
 	unsigned long exc_stack;
+	unsigned long return_sp;
 };
 
 /**
@@ -43,6 +49,18 @@ static inline unsigned long top_of_intr_stack(void)
 	return current_top_of_stack();
 }
 
+/**
+ * handle_sbm_fault() - Handle a CPU fault in sandbox mode.
+ * @regs:       Saved registers at fault.
+ * @error_code: CPU error code.
+ * @address:    Fault address (CR2 register).
+ *
+ * Handle a sandbox mode fault. The caller should use sandbox_mode() to
+ * check that @regs came from sandbox mode before calling this function.
+ */
+void handle_sbm_fault(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address);
+
 #else /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */
 
 static inline unsigned long top_of_intr_stack(void)
@@ -50,6 +68,12 @@ static inline unsigned long top_of_intr_stack(void)
 	return current_top_of_stack();
 }
 
+static inline void handle_sbm_fault(struct pt_regs *regs,
+				    unsigned long error_code,
+				    unsigned long address)
+{
+}
+
 #endif /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */
 
 #endif /* __ASM_SBM_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9d6411c65920..966831385d18 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -217,6 +217,13 @@
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __CPUNODE_SEG			(GDT_ENTRY_CPUNODE*8 + 3)
 
+/*
+ * Sandbox runs with __USER_CS, but the interrupt entry code sets the RPL
+ * in the saved selector to zero to avoid user-mode processing (FPU, signal
+ * delivery, etc.). This is the resulting pseudo-CS.
+ */
+#define __SBM_CS			(GDT_ENTRY_DEFAULT_USER_CS*8)
+
 #endif
 
 #define IDT_ENTRIES			256
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 6913b372ccf7..44d4f0a0cb19 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -20,6 +20,7 @@
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 #include <asm/tdx.h>
+#include <asm/sbm.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -120,4 +121,8 @@ static void __used common(void)
 	OFFSET(ARIA_CTX_rounds, aria_ctx, rounds);
 #endif
 
+#if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)
+	COMMENT("SandBox Mode");
+	OFFSET(SBM_return_sp, x86_sbm_state, return_sp);
+#endif
 }
diff --git a/arch/x86/kernel/sbm/call_64.S b/arch/x86/kernel/sbm/call_64.S
index 1b232c8d15b7..6a615b4f6047 100644
--- a/arch/x86/kernel/sbm/call_64.S
+++ b/arch/x86/kernel/sbm/call_64.S
@@ -22,6 +22,17 @@
  * rcx  .. top of sandbox stack
  */
 SYM_FUNC_START(x86_sbm_exec)
+	/* save all callee-saved registers */
+	push	%rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	/* to be used by sandbox abort */
+	mov	%rsp, SBM_return_sp(%rdi)
+
 	/*
 	 * Set up the sandbox stack:
 	 * 1. Store the old stack pointer at the top of the sandbox stack,
@@ -37,5 +48,15 @@ SYM_FUNC_START(x86_sbm_exec)
 
 	pop	%rsp
 
+SYM_INNER_LABEL(x86_sbm_return, SYM_L_GLOBAL)
+	ANNOTATE_NOENDBR	// IRET target via x86_sbm_fault()
+
+	/* restore callee-saved registers and return */
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbx
+	pop	%rbp
 	RET
 SYM_FUNC_END(x86_sbm_exec)
diff --git a/arch/x86/kernel/sbm/core.c b/arch/x86/kernel/sbm/core.c
index 81f1b0093537..d4c378847e93 100644
--- a/arch/x86/kernel/sbm/core.c
+++ b/arch/x86/kernel/sbm/core.c
@@ -13,6 +13,8 @@
 #include <asm/page.h>
 #include <asm/sbm.h>
 #include <asm/sections.h>
+#include <asm/segment.h>
+#include <asm/trap_pf.h>
 #include <linux/cpumask.h>
 #include <linux/mm.h>
 #include <linux/sbm.h>
@@ -23,6 +25,7 @@
 
 asmlinkage int x86_sbm_exec(struct x86_sbm_state *state, sbm_func func,
 			    void *args, unsigned long sbm_tos);
+extern char x86_sbm_return[];
 
 static inline phys_addr_t page_to_ptval(struct page *page)
 {
@@ -343,6 +346,8 @@ int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)
 	struct x86_sbm_state *state = sbm->private;
 	int err;
 
+	state->sbm = sbm;
+
 	/* let interrupt handlers use the sandbox state page */
 	barrier();
 	WRITE_ONCE(current_thread_info()->sbm_state, state);
@@ -354,3 +359,24 @@ int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)
 
 	return err;
 }
+
+void handle_sbm_fault(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address)
+{
+	struct x86_sbm_state *state = current_thread_info()->sbm_state;
+
+	/*
+	 * Force -EFAULT unless the fault was due to a user-mode instruction
+	 * fetch from the designated return address.
+	 */
+	if (error_code != (X86_PF_PROT | X86_PF_USER | X86_PF_INSTR) ||
+	    address != (unsigned long)x86_sbm_return)
+		state->sbm->error = -EFAULT;
+
+	/* modify IRET frame to exit from sandbox */
+	regs->ip = (unsigned long)x86_sbm_return;
+	regs->cs = __KERNEL_CS;
+	regs->flags = X86_EFLAGS_IF;
+	regs->sp = state->return_sp;
+	regs->ss = __KERNEL_DS;
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9c9c74314e7..8fc5b17b8fb4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -416,6 +416,12 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
 
 	irqentry_nmi_enter(regs);
 	instrumentation_begin();
+
+	if (sandbox_mode(regs)) {
+		handle_sbm_fault(regs, error_code, 0);
+		return;
+	}
+
 	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
 	tsk->thread.error_code = error_code;
@@ -675,6 +681,11 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 		goto exit;
 	}
 
+	if (sandbox_mode(regs)) {
+		handle_sbm_fault(regs, error_code, 0);
+		return;
+	}
+
 	if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
 		goto exit;
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..f223b258e53f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -34,6 +34,7 @@
 #include <asm/kvm_para.h>		/* kvm_handle_async_pf		*/
 #include <asm/vdso.h>			/* fixup_vdso_exception()	*/
 #include <asm/irq_stack.h>
+#include <asm/sbm.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -1500,6 +1501,11 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
 	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
+	if (sandbox_mode(regs)) {
+		handle_sbm_fault(regs, error_code, address);
+		return;
+	}
+
 	/* Was the fault on kernel-controlled part of the address space? */
 	if (unlikely(fault_in_kernel_space(address))) {
 		do_kern_addr_fault(regs, error_code, address);
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ