lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 12 Dec 2022 11:53:43 -0700
From:   "Jason A. Donenfeld" <Jason@...c4.com>
To:     linux-kernel@...r.kernel.org, patches@...ts.linux.dev,
        tglx@...utronix.de
Cc:     "Jason A. Donenfeld" <Jason@...c4.com>,
        linux-crypto@...r.kernel.org, linux-api@...r.kernel.org,
        x86@...nel.org, Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Adhemerval Zanella Netto <adhemerval.zanella@...aro.org>,
        Carlos O'Donell <carlos@...hat.com>,
        Florian Weimer <fweimer@...hat.com>,
        Arnd Bergmann <arnd@...db.de>, Jann Horn <jannh@...gle.com>,
        Christian Brauner <brauner@...nel.org>, linux-mm@...ck.org
Subject: [PATCH RFC v12 2/6] x86: mm: Skip faulting instruction for VM_DROPPABLE faults

The prior commit introduced VM_DROPPABLE, but in a limited form where
the faulting instruction was retried instead of skipped. Finish that up
with the platform-specific aspect of skipping the actual instruction.

This works by copying userspace's %rip to a stack buffer of size
MAX_INSN_SIZE, decoding it, and then adding the length of the decoded
instruction to userspace's %rip. In the event any of these fail, just
fallback to not advancing %rip and trying again.

Cc: linux-mm@...ck.org
Signed-off-by: Jason A. Donenfeld <Jason@...c4.com>
---
 arch/x86/mm/fault.c      | 19 +++++++++++++++++++
 include/linux/mm_types.h |  5 ++++-
 mm/memory.c              |  4 +++-
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7b0d4ab894c8..e5328073f8e0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -33,6 +33,8 @@
 #include <asm/kvm_para.h>		/* kvm_handle_async_pf		*/
 #include <asm/vdso.h>			/* fixup_vdso_exception()	*/
 #include <asm/irq_stack.h>
+#include <asm/insn.h>			/* insn_decode()		*/
+#include <asm/compat.h>			/* in_32bit_syscall()		*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -1454,6 +1456,23 @@ void do_user_addr_fault(struct pt_regs *regs,
 	}
 
 	mmap_read_unlock(mm);
+
+	if (fault & VM_FAULT_SKIP_INSN) {
+		u8 insn_buf[MAX_INSN_SIZE];
+		struct insn insn;
+		size_t len;
+
+		len = sizeof(insn_buf) - copy_from_user(insn_buf, (void *)regs->ip, sizeof(insn_buf));
+		if (!len)
+			return;
+
+		if (insn_decode(&insn, insn_buf, len, in_32bit_syscall() ? INSN_MODE_32 : INSN_MODE_64) < 0)
+			return;
+
+		regs->ip += insn.length;
+		return;
+	}
+
 	if (likely(!(fault & VM_FAULT_ERROR)))
 		return;
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 500e536796ca..4def1051499b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -861,6 +861,7 @@ typedef __bitwise unsigned int vm_fault_t;
  *				fsync() to complete (for synchronous page faults
  *				in DAX)
  * @VM_FAULT_COMPLETED:		->fault completed, meanwhile mmap lock released
+ * @VM_FAULT_SKIP_INSN:		->handle the fault by skipping faulting instruction
  * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
  *
  */
@@ -879,6 +880,7 @@ enum vm_fault_reason {
 	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
 	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
 	VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
+	VM_FAULT_SKIP_INSN      = (__force vm_fault_t)0x008000,
 	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
 };
 
@@ -903,7 +905,8 @@ enum vm_fault_reason {
 	{ VM_FAULT_RETRY,               "RETRY" },	\
 	{ VM_FAULT_FALLBACK,            "FALLBACK" },	\
 	{ VM_FAULT_DONE_COW,            "DONE_COW" },	\
-	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
+	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },	\
+	{ VM_FAULT_SKIP_INSN,		"SKIP_INSN" }
 
 struct vm_special_mapping {
 	const char *name;	/* The name, e.g. "[vdso]". */
diff --git a/mm/memory.c b/mm/memory.c
index 72403585e1a5..8834a7c1580f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5220,8 +5220,10 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	lru_gen_exit_fault();
 
 	/* If the mapping is droppable, then errors due to OOM aren't fatal. */
-	if (vma->vm_flags & VM_DROPPABLE)
+	if ((ret & VM_FAULT_OOM) && (vma->vm_flags & VM_DROPPABLE)) {
 		ret &= ~VM_FAULT_OOM;
+		ret |= VM_FAULT_SKIP_INSN;
+	}
 
 	if (flags & FAULT_FLAG_USER) {
 		mem_cgroup_exit_user_fault();
-- 
2.38.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ