lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181017223332.11964-7-linux@rasmusvillemoes.dk>
Date:   Thu, 18 Oct 2018 00:33:27 +0200
From:   Rasmus Villemoes <linux@...musvillemoes.dk>
To:     linux-kernel@...r.kernel.org
Cc:     x86@...nel.org, "H . Peter Anvin" <hpa@...or.com>,
        Ingo Molnar <mingo@...nel.org>,
        "Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
        Rasmus Villemoes <linux@...musvillemoes.dk>
Subject: [POC 07/12] x86-64: rai: implement _rai_load

This implements the simplest of the rai_* operations, loading a
value. For load of an 8-byte value, I believe we do need to keep room
for a movabs, since there's no guarantee the final value can be loaded
with as an imm32 or using a %rip-relative leaq.

It wouldn't hurt to add some sanity checking in rai_patch_one, e.g. at
least check that the immediate we are replacing is the dummy 0x12345678
we used in the .rai_templ section.

That the patching works can be seen in a quick virtme session.  gdb on
vmlinux and /proc/kcore shows

(gdb) x/16i rai_proc_show
   0xffffffff8108c120 <rai_proc_show>:  mov    $0xffffffff81fd9ad4,%rsi
   0xffffffff8108c127 <rai_proc_show+7>:        jmpq   0xffffffff819652e9
   0xffffffff8108c12c <rai_proc_show+12>:       nop
   0xffffffff8108c12d <rai_proc_show+13>:       nop
   0xffffffff8108c12e <rai_proc_show+14>:       nop
   0xffffffff8108c12f <rai_proc_show+15>:       nop
   0xffffffff8108c130 <rai_proc_show+16>:       nop
   0xffffffff8108c131 <rai_proc_show+17>:       jmpq   0xffffffff819652f5
   0xffffffff8108c136 <rai_proc_show+22>:       jmpq   0xffffffff81965300
   0xffffffff8108c13b <rai_proc_show+27>:       callq  0xffffffff81238bb0 <seq_printf>
   0xffffffff8108c140 <rai_proc_show+32>:       mov    $0xffffffffffffffff,%rax
   0xffffffff8108c147 <rai_proc_show+39>:       mov    %rax,0x17b228a(%rip)        # 0xffffffff8283e3d8 <three>
   0xffffffff8108c14e <rai_proc_show+46>:       mov    %eax,0x17b228c(%rip)        # 0xffffffff8283e3e0 <two>
   0xffffffff8108c154 <rai_proc_show+52>:       mov    %eax,0x17b228a(%rip)        # 0xffffffff8283e3e4 <one>
   0xffffffff8108c15a <rai_proc_show+58>:       xor    %eax,%eax
   0xffffffff8108c15c <rai_proc_show+60>:       retq
(gdb) x/16i 0xffffffff96e8c120
   0xffffffff96e8c120:  mov    $0xffffffff97dd9ad4,%rsi
   0xffffffff96e8c127:  movabs $0x3,%r8
   0xffffffff96e8c131:  mov    $0x2,%ecx
   0xffffffff96e8c136:  mov    $0x1,%edx
   0xffffffff96e8c13b:  callq  0xffffffff97038bb0
   0xffffffff96e8c140:  mov    $0xffffffffffffffff,%rax
   0xffffffff96e8c147:  mov    %rax,0x17b228a(%rip)        # 0xffffffff9863e3d8
   0xffffffff96e8c14e:  mov    %eax,0x17b228c(%rip)        # 0xffffffff9863e3e0
   0xffffffff96e8c154:  mov    %eax,0x17b228a(%rip)        # 0xffffffff9863e3e4
   0xffffffff96e8c15a:  xor    %eax,%eax
   0xffffffff96e8c15c:  retq
   0xffffffff96e8c15d:  nopl   (%rax)
   0xffffffff96e8c160:  push   %rbx
   0xffffffff96e8c161:  mov    $0xffffffff9804c240,%rdi
   0xffffffff96e8c168:  mov    $0xffffffff97e9fccc,%rbx
   0xffffffff96e8c16f:  callq  0xffffffff9776b230

where we also see that gcc chooses the destination registers rather
intelligently. As expected, repeated "cat /proc/rai" continues to print
"one: 1, two: 2, three: 3".

Signed-off-by: Rasmus Villemoes <linux@...musvillemoes.dk>
---
 arch/x86/include/asm/rai.S | 42 +++++++++++++++++++++++++++++++++++++-
 arch/x86/include/asm/rai.h | 30 ++++++++++++++++++++++++++-
 arch/x86/kernel/rai.c      | 18 ++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/rai.S b/arch/x86/include/asm/rai.S
index 253d27453416..f42cdd8db876 100644
--- a/arch/x86/include/asm/rai.S
+++ b/arch/x86/include/asm/rai.S
@@ -8,11 +8,51 @@
 	.long \templ_end - \templ
 	.long \thunk - .
 .endm
-	
+
 .macro rai_entry_pad start end
 	.ifgt STRUCT_RAI_ENTRY_SIZE-(\end-\start)
 	.skip STRUCT_RAI_ENTRY_SIZE-(\end-\start), 0x00
 	.endif
 .endm
 
+.macro rai_load dst, var, type
+	.pushsection .rai_templ, "aw"
+10:
+	.ifeq \type - RAI_LOAD_8
+	movabs $0x1234567812345678, \dst
+	.else
+	mov $0x12345678, \dst
+	.endif
+11:
+	.popsection
+
+	/* Even if the mov \var, \dst is short enough to fit in the
+	 * space we reserve in .text, we still need the thunk for when
+	 * we do the immediate patching. */
+	.pushsection .text.rai_thunk, "ax"
+20:
+	mov \var(%rip), \dst
+	jmp 32f
+21:
+	.popsection
+
+	/* The part that goes into .text */
+30:
+	/* silence objtool by actually using the thunk for now */
+	jmp 20b
+	/* mov \var(%rip), \dst */
+31:
+	.skip -(((11b - 10b)-(31b - 30b)) > 0)*((11b - 10b)-(31b - 30b)), 0x90
+32:
+
+	.pushsection .rai_data, "a"
+40:
+	rai_entry \type 30b 32b 10b 11b 20b
+	.quad \var   /* .load.addr */
+41:
+	rai_entry_pad 40b 41b
+	.popsection
+.endm /* rai_load */
+
+
 #endif
diff --git a/arch/x86/include/asm/rai.h b/arch/x86/include/asm/rai.h
index 269d696255b0..b57494c98d0f 100644
--- a/arch/x86/include/asm/rai.h
+++ b/arch/x86/include/asm/rai.h
@@ -1,7 +1,10 @@
 #ifndef _ASM_X86_RAI_H
 #define _ASM_X86_RAI_H
 
-#define STRUCT_RAI_ENTRY_SIZE 24
+#define RAI_LOAD_4 0
+#define RAI_LOAD_8 1
+
+#define STRUCT_RAI_ENTRY_SIZE 32
 
 /* Put the asm macros in a separate file for easier editing. */
 #include <asm/rai.S>
@@ -16,10 +19,35 @@ struct rai_entry {
 	s32 templ_len;    /* length of template */
 	s32 thunk_offset; /* member-relative offset to ool thunk */
 	/* type-specific data follows */
+	union {
+		struct {
+			void *addr;
+		} load;
+	};
 };
 _Static_assert(sizeof(struct rai_entry) == STRUCT_RAI_ENTRY_SIZE,
 	       "please update STRUCT_RAI_ENTRY_SIZE");
 
+#define _rai_load(var) ({						\
+		typeof(var) ret__;					\
+		switch(sizeof(var)) {					\
+		case 4:							\
+			asm("rai_load %0, %c1, %c2"			\
+			    : "=r" (ret__)				\
+			    : "i" (&(var)), "i" (RAI_LOAD_4));		\
+			break;						\
+		case 8:							\
+			asm("rai_load %0, %c1, %c2"			\
+			    : "=r" (ret__)				\
+			    : "i" (&(var)), "i" (RAI_LOAD_8));		\
+			break;						\
+		default:						\
+			ret__ = _rai_load_fallback(var);		\
+			break;						\
+		}							\
+		ret__;							\
+	})
+
 #endif /* !__ASSEMBLY */
 
 #endif /* _ASM_X86_RAI_H */
diff --git a/arch/x86/kernel/rai.c b/arch/x86/kernel/rai.c
index 819d03a025e3..e55e85f11a2e 100644
--- a/arch/x86/kernel/rai.c
+++ b/arch/x86/kernel/rai.c
@@ -14,6 +14,24 @@ rai_patch_one(const struct rai_entry *r)
 	u8 *thunk = (u8*)&r->thunk_offset + r->thunk_offset;
 
 	switch (r->type) {
+	case RAI_LOAD_4: {
+		const u32 *imm = r->load.addr;
+		/*
+		 * The immediate is the last 4 bytes of the template,
+		 * regardless of the operand encoding.
+		 */
+		memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm));
+		break;
+	}
+	case RAI_LOAD_8: {
+		const u64 *imm = r->load.addr;
+		/*
+		 * The immediate is the last 8 bytes of the template,
+		 * regardless of the operand encoding.
+		 */
+		memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm));
+		break;
+	}
 	default:
 		WARN_ONCE(1, "unhandled RAI type %d\n", r->type);
 		return;
-- 
2.19.1.6.gbde171bbf5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ