lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87tu7euska.ffs@tglx>
Date:   Mon, 18 Jul 2022 21:30:45 +0200
From:   Thomas Gleixner <tglx@...utronix.de>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     x86@...nel.org, Linus Torvalds <torvalds@...ux-foundation.org>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        Josh Poimboeuf <jpoimboe@...nel.org>,
        Andrew Cooper <Andrew.Cooper3@...rix.com>,
        Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
        Johannes Wikner <kwikner@...z.ch>,
        Alyssa Milburn <alyssa.milburn@...ux.intel.com>,
        Jann Horn <jannh@...gle.com>, "H.J. Lu" <hjl.tools@...il.com>,
        Joao Moreira <joao.moreira@...el.com>,
        Joseph Nuzman <joseph.nuzman@...el.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Juergen Gross <jgross@...e.com>,
        "Peter Zijlstra (Intel)" <peterz@...radead.org>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>
Subject: Re: [patch 00/38] x86/retbleed: Call depth tracking mitigation

On Mon, Jul 18 2022 at 21:29, Thomas Gleixner wrote:
>> The implementation falls back to the allocated thunks when padding is not
>> available. I'll send out the GCC patch and the required kernel patch as a
>> reply to this series after polishing it a bit.
>
> Here it goes. GCC hackery first.

And the kernel counterpart.

---
Subject: x06/callthunks: Put thunks into compiler provided padding area
From: Thomas Gleixner <tglx@...utronix.de>
Date: Fri, 15 Jul 2022 16:12:47 +0200

      - NOT FOR INCLUSION -

Let the compiler add a 16 byte padding in front of each function entry
point and put the call depth accounting there. That avoids calling out
into the module area and reduces ITLB pressure.

Not-Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
---
 arch/x86/Kconfig             |   14 ++++++
 arch/x86/Makefile            |    4 +
 arch/x86/kernel/callthunks.c |   99 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 115 insertions(+), 2 deletions(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2440,6 +2440,9 @@ config CC_HAS_SLS
 config CC_HAS_RETURN_THUNK
 	def_bool $(cc-option,-mfunction-return=thunk-extern)
 
+config CC_HAS_PADDING
+	def_bool $(cc-option,-mforce-function-padding)
+
 config HAVE_CALL_THUNKS
 	def_bool y
 	depends on RETHUNK && OBJTOOL
@@ -2512,6 +2515,17 @@ config CALL_DEPTH_TRACKING
 	  of this option is marginal as the call depth tracking is using
 	  run-time generated call thunks and call patching.
 
+config CALL_THUNKS_IN_PADDING
+	bool "Put call depth into padding area before function"
+	depends on CALL_DEPTH_TRACKING && CC_HAS_PADDING
+	default n
+	help
+	  Put the call depth accounting into a padding area before the
+	  function entry. This padding area is generated by the
+	  compiler. This increases text size by ~5%. For non affected
+	  systems this space is unused. On affected SKL systems this
+	  results in a significant performance gain.
+
 config CALL_THUNKS_DEBUG
 	bool "Enable call thunks and call depth tracking debugging"
 	depends on CALL_DEPTH_TRACKING
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -197,6 +197,10 @@ ifdef CONFIG_SLS
   KBUILD_CFLAGS += -mharden-sls=all
 endif
 
+ifdef CONFIG_CALL_THUNKS_IN_PADDING
+  KBUILD_CFLAGS += -mforce-function-padding
+endif
+
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
 
 ifdef CONFIG_LTO_CLANG
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -92,6 +92,7 @@ struct thunk_mem {
 
 struct thunk_mem_area {
 	struct thunk_mem	*tmem;
+	unsigned long		*dests;
 	unsigned long		start;
 	unsigned long		nthunks;
 };
@@ -181,6 +182,16 @@ static __init_or_module void callthunk_f
 	      tmem->base + area->start * callthunk_desc.thunk_size,
 	      area->start, area->nthunks);
 
+	/* Remove thunks in the padding area */
+	for (i = 0; area->dests && i < area->nthunks; i++) {
+		void *dest = (void *)area->dests[i];
+
+		if (!dest)
+			continue;
+		pr_info("Remove %px at index %u\n", dest, i);
+		btree_remove64(&call_thunks, (unsigned long)dest);
+	}
+
 	/* Jump starts right after the template */
 	thunk = tmem->base + area->start * callthunk_desc.thunk_size;
 	tp = thunk + callthunk_desc.template_size;
@@ -204,6 +215,7 @@ static __init_or_module void callthunk_f
 		size = area->nthunks * callthunk_desc.thunk_size;
 		text_poke_set_locked(thunk, 0xcc, size);
 	}
+	vfree(area->dests);
 	kfree(area);
 }
 
@@ -289,7 +301,8 @@ patch_paravirt_call_sites(struct paravir
 		patch_call(p->instr, layout);
 }
 
-static struct thunk_mem_area *callthunks_alloc(unsigned int nthunks)
+static struct thunk_mem_area *callthunks_alloc(unsigned int nthunks,
+					       bool module)
 {
 	struct thunk_mem_area *area;
 	unsigned int size, mapsize;
@@ -299,6 +312,13 @@ static struct thunk_mem_area *callthunks
 	if (!area)
 		return NULL;
 
+	if (module) {
+		area->dests = vzalloc(nthunks * sizeof(unsigned long));
+		if (!area->dests)
+			goto free_area;
+		pr_info("Allocated dests array: %px\n", area->dests);
+	}
+
 	list_for_each_entry(tmem, &thunk_mem_list, list) {
 		unsigned long start;
 
@@ -340,6 +360,7 @@ static struct thunk_mem_area *callthunks
 free_tmem:
 	kfree(tmem);
 free_area:
+	vfree(area->dests);
 	kfree(area);
 	return NULL;
 }
@@ -372,6 +393,73 @@ static __init_or_module int callthunk_se
 	return 0;
 }
 
+int setup_padding_thunks(s32 *start, s32 *end, struct thunk_mem_area *area,
+			 struct module_layout *layout)
+{
+	int nthunks = 0, idx = 0;
+	s32 *s;
+
+	if (callthunk_desc.template_size > 16)
+		return 0;
+
+	for (s = start; s < end; s++) {
+		void *thunk, *tp, *dest = (void *)s + *s;
+		unsigned long key = (unsigned long)dest;
+		int fail, i;
+		u8 opcode;
+
+		if (is_inittext(layout, dest)) {
+			prdbg("Ignoring init dest: %pS %px\n", dest, dest);
+			return 0;
+		}
+
+		/* Multiple symbols can have the same location. */
+		if (btree_lookup64(&call_thunks, key)) {
+			prdbg("Ignoring duplicate dest: %pS %px\n", dest, dest);
+			continue;
+		}
+
+		thunk = tp = dest - 16;
+		prdbg("Probing dest: %pS %px at %px\n", dest, dest, tp);
+		pagefault_disable();
+		fail = 0;
+		for (i = 0; !fail && i < 16; i++) {
+			if (get_kernel_nofault(opcode, tp + i)) {
+				fail = 1;
+			} else if (opcode != 0xcc) {
+				fail = 2;
+			}
+		}
+		pagefault_enable();
+		switch (fail) {
+		case 1:
+			prdbg("Faulted for dest: %pS %px\n", dest, dest);
+			nthunks++;
+			continue;
+		case 2:
+			prdbg("No padding for dest: %pS %px\n", dest, dest);
+			nthunks++;
+			continue;
+		}
+
+		prdbg("Thunk for dest: %pS %px at %px\n", dest, dest, tp);
+		memcpy(tp, callthunk_desc.template, callthunk_desc.template_size);
+		tp += callthunk_desc.template_size;
+		memcpy(tp, x86_nops[6], 6);
+
+		if (area->dests) {
+			pr_info("Insert %px at index %d\n", dest, idx);
+			area->dests[idx++] = key;
+		}
+
+		fail = btree_insert64(&call_thunks, key, (void *)thunk, GFP_KERNEL);
+		if (fail)
+			return fail;
+	}
+	prdbg("%d external thunks required\n", nthunks);
+	return 0;
+}
+
 static __init_or_module int callthunks_setup(struct callthunk_sites *cs,
 					     struct module_layout *layout)
 {
@@ -394,7 +482,7 @@ static __init_or_module int callthunks_s
 	if (!nthunks)
 		goto patch;
 
-	area = callthunks_alloc(nthunks);
+	area = callthunks_alloc(nthunks, !!layout->mtn.mod);
 	if (!area)
 		return -ENOMEM;
 
@@ -420,6 +508,13 @@ static __init_or_module int callthunks_s
 		prdbg("Using thunk vbuf %px\n", vbuf);
 	}
 
+	if (IS_ENABLED(CONFIG_CALL_THUNKS_IN_PADDING)) {
+		ret = setup_padding_thunks(cs->syms_start, cs->syms_end,
+					   area, layout);
+		if (ret < 0)
+			goto fail;
+	}
+
 	for (s = cs->syms_start; s < cs->syms_end; s++) {
 		void *dest = (void *)s + *s;
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ