lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250210104034.146273-1-dongml2@chinatelecom.cn>
Date: Mon, 10 Feb 2025 18:40:34 +0800
From: Menglong Dong <menglong8.dong@...il.com>
To: alexei.starovoitov@...il.com,
	x86@...nel.org
Cc: tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	dave.hansen@...ux.intel.com,
	hpa@...or.com,
	rostedt@...dmis.org,
	mhiramat@...nel.org,
	mathieu.desnoyers@...icios.com,
	dongml2@...natelecom.cn,
	linux-kernel@...r.kernel.org,
	linux-trace-kernel@...r.kernel.org,
	bpf@...r.kernel.org
Subject: [RFC PATCH] x86: add function metadata support

With CONFIG_CALL_PADDING enabled, there will be 16-bytes(or more) padding
space before all the kernel functions. And some kernel features can use
it, such as MITIGATION_CALL_DEPTH_TRACKING, CFI_CLANG, FINEIBT, etc.

In this commit, we add supporting to store per-function metadata in the
function padding, and previous discussion can be found in [1]. Generally
speaking, we have two way to implement this feature:

1. create a function metadata array, and prepend a 5-bytes insn
"mov %eax, 0x12345678", and store the insn to the function padding.
And 0x12345678 means the index of the function metadata in the array.
By this way, 5-bytes will be consumed in the function padding.

2. prepend a 10-bytes insn "mov %rax, 0x12345678aabbccdd" and store
the insn to the function padding, and 0x12345678aabbccdd means the address
of the function metadata.

Compared with way 2, way 1 consume less space, but we need to do more work
on the global function metadata array. And in this commit, we implemented
the way 1.

In my research, MITIGATION_CALL_DEPTH_TRACKING will consume the tail
9-bytes in the function padding, and FINEIBT+CFI_CLANG will consume
the head 7-bytes. So there will be no space for us if
MITIGATION_CALL_DEPTH_TRACKING and CFI_CLANG are both enabled. So I have
following logic:
1. use the head 5-bytes if CFI_CLANG is not enabled
2. use the tail 5-bytes if MITIGATION_CALL_DEPTH_TRACKING is not enabled
3. compile the kernel with extra 5-bytes padding if
   MITIGATION_CALL_DEPTH_TRACKING and CFI_CLANG are both enabled.

In the third case, we compile the kernel with a function padding of
21-bytes, which means the real function is not 16-bytes aligned any more.
And in [2], I tested the performance of the kernel with different padding,
and it seems that extra 5-bytes don't have impact on the performance.
However, it's a huge change to make the kernel function unaligned in
16-bytes, and I'm sure if there are any other influence. So another choice
is to compile the kernel with 32-bytes aligned if there is no space
available for us in the function padding. But this will increase the text
size ~5%. (And I'm not sure with method to use.)

The beneficiaries of this feature can be BPF and ftrace. For BPF, we can
implement a "dynamic trampoline" and add tracing multi-link supporting
base on this feature. And for ftrace, we can optimize its performance
base on this feature.

This commit is not complete, as the synchronous of func_metas is not
considered :/

Link: https://lore.kernel.org/bpf/CADxym3anLzM6cAkn_z71GDd_VeKiqqk1ts=xuiP7pr4PO6USPA@mail.gmail.com/ [1]
Link: https://lore.kernel.org/bpf/CADxym3af+CU5Mx8myB8UowdXSc3wJOqWyH4oyq+eXKahXBTXyg@mail.gmail.com/ [2]
Signed-off-by: Menglong Dong <dongml2@...natelecom.cn>
---
 arch/x86/Kconfig          |  15 ++++
 arch/x86/Makefile         |  17 ++--
 include/linux/func_meta.h |  17 ++++
 kernel/trace/Makefile     |   1 +
 kernel/trace/func_meta.c  | 184 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 228 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/func_meta.h
 create mode 100644 kernel/trace/func_meta.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6df7779ed6da..0ff3cb74cfc0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2510,6 +2510,21 @@ config PREFIX_SYMBOLS
 	def_bool y
 	depends on CALL_PADDING && !CFI_CLANG
 
+config FUNCTION_METADATA
+	bool "Enable function metadata support"
+	select CALL_PADDING
+	default y
+	help
+	  This feature allow us to set metadata for kernel functions, and
+	  get the metadata of the function by its address without any
+	  costing.
+
+	  The index of the metadata will be stored in the function padding,
+	  which will consume 5-bytes. The spare space of the padding
+	  is enough for us with CALL_PADDING and FUNCTION_ALIGNMENT_16B if
+	  CALL_THUNKS or CFI_CLANG not enabled. Or, we need extra 5-bytes
+	  in the function padding, which will increases text ~1%.
+
 menuconfig CPU_MITIGATIONS
 	bool "Mitigations for CPU vulnerabilities"
 	default y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5b773b34768d..05689c9a8942 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -240,13 +240,18 @@ ifdef CONFIG_MITIGATION_SLS
 endif
 
 ifdef CONFIG_CALL_PADDING
-PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
-KBUILD_CFLAGS += $(PADDING_CFLAGS)
-export PADDING_CFLAGS
+  __padding_bytes := $(CONFIG_FUNCTION_PADDING_BYTES)
+  ifneq ($(and $(CONFIG_FUNCTION_METADATA),$(CONFIG_CALL_THUNKS),$(CONFIG_CFI_CLANG)),)
+    __padding_bytes := $(shell echo $(__padding_bytes) + 5 | bc)
+  endif
+
+  PADDING_CFLAGS := -fpatchable-function-entry=$(__padding_bytes),$(__padding_bytes)
+  KBUILD_CFLAGS += $(PADDING_CFLAGS)
+  export PADDING_CFLAGS
 
-PADDING_RUSTFLAGS := -Zpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
-KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS)
-export PADDING_RUSTFLAGS
+  PADDING_RUSTFLAGS := -Zpatchable-function-entry=$(__padding_bytes),$(__padding_bytes)
+  KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS)
+  export PADDING_RUSTFLAGS
 endif
 
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
diff --git a/include/linux/func_meta.h b/include/linux/func_meta.h
new file mode 100644
index 000000000000..840cbd858c47
--- /dev/null
+++ b/include/linux/func_meta.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FUNC_META_H
+#define _LINUX_FUNC_META_H
+
+#include <linux/kernel.h>
+
+struct func_meta {
+	int users;
+	void *func;
+};
+
+extern struct func_meta *func_metas;
+
+struct func_meta *func_meta_get(void *ip);
+void func_meta_put(void *ip, struct func_meta *meta);
+
+#endif
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 057cd975d014..4042c168dcfc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
 obj-$(CONFIG_FPROBE) += fprobe.o
 obj-$(CONFIG_RETHOOK) += rethook.o
 obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
+obj-$(CONFIG_FUNCTION_METADATA) += func_meta.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 obj-$(CONFIG_RV) += rv/
diff --git a/kernel/trace/func_meta.c b/kernel/trace/func_meta.c
new file mode 100644
index 000000000000..9ce77da81e71
--- /dev/null
+++ b/kernel/trace/func_meta.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/func_meta.h>
+#include <linux/text-patching.h>
+
+#define FUNC_META_INSN_SIZE	5
+
+#ifdef CONFIG_CFI_CLANG
+  #ifdef CONFIG_CALL_THUNKS
+    /* use the extra 5-bytes that we reserve */
+    #define FUNC_META_INSN_OFFSET	(CONFIG_FUNCTION_PADDING_BYTES + 5)
+    #define FUNC_META_DATA_OFFSET	(CONFIG_FUNCTION_PADDING_BYTES + 4)
+  #else
+    /* use the space that CALL_THUNKS suppose to use */
+    #define FUNC_META_INSN_OFFSET	(5)
+    #define FUNC_META_DATA_OFFSET	(4)
+  #endif
+#else
+  /* use the space that CFI_CLANG support to use */
+  #define FUNC_META_INSN_OFFSET	(CONFIG_FUNCTION_PADDING_BYTES)
+  #define FUNC_META_DATA_OFFSET	(CONFIG_FUNCTION_PADDING_BYTES - 1)
+#endif
+
+static u32 func_meta_count = 32, func_meta_used;
+struct func_meta *func_metas;
+EXPORT_SYMBOL_GPL(func_metas);
+
+static DEFINE_MUTEX(func_meta_lock);
+
+static u32 func_meta_get_index(void *ip)
+{
+	return *(u32 *)(ip - FUNC_META_DATA_OFFSET);
+}
+
+static bool func_meta_exist(void *ip)
+{
+	return *(u8 *)(ip - FUNC_META_INSN_OFFSET) == 0xB8;
+}
+
+static void func_meta_init(struct func_meta *metas, u32 start, u32 end)
+{
+	u32 i;
+
+	for (i = start; i < end; i++)
+		metas[i].users = 0;
+}
+
+/* get next usable function metadata */
+static struct func_meta *func_meta_get_next(u32 *index)
+{
+	struct func_meta *tmp;
+	u32 i;
+
+	if (func_metas == NULL) {
+		func_metas = kmalloc_array(func_meta_count, sizeof(*tmp),
+					   GFP_KERNEL);
+		if (!func_metas)
+			return NULL;
+		func_meta_init(func_metas, 0, func_meta_count);
+	}
+
+	/* maybe we can manage the used function metadata entry with a bit
+	 * map ?
+	 */
+	for (i = 0; i < func_meta_count; i++) {
+		if (!func_metas[i].users) {
+			func_meta_used++;
+			*index = i;
+			func_metas[i].users++;
+			return func_metas + i;
+		}
+	}
+
+	tmp = kmalloc_array(func_meta_count * 2, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return NULL;
+
+	memcpy(tmp, func_metas, func_meta_count * sizeof(*tmp));
+	kfree(func_metas);
+
+	/* TODO: we need a way to update func_metas synchronously, RCU ? */
+	func_metas = tmp;
+	func_meta_init(func_metas, func_meta_count, func_meta_count * 2);
+
+	tmp += func_meta_count;
+	*index = func_meta_count;
+	func_meta_count <<= 1;
+	func_meta_used++;
+	tmp->users++;
+
+	return tmp;
+}
+
+static int func_meta_text_poke(void *ip, u32 index, bool nop)
+{
+	const u8 nop_insn[FUNC_META_INSN_SIZE] = { BYTES_NOP1, BYTES_NOP1,
+						   BYTES_NOP1, BYTES_NOP1,
+						   BYTES_NOP1 };
+	u8 insn[FUNC_META_INSN_SIZE] = { 0xB8, };
+	const u8 *prog;
+	void *target;
+	int ret = 0;
+
+	target = ip - FUNC_META_INSN_OFFSET;
+	mutex_lock(&text_mutex);
+	if (nop) {
+		if (!memcmp(target, nop_insn, FUNC_META_INSN_SIZE))
+			goto out;
+		prog = nop_insn;
+	} else {
+		*(u32 *)(insn + 1) = index;
+		if (!memcmp(target, insn, FUNC_META_INSN_SIZE))
+			goto out;
+
+		if (memcmp(target, nop_insn, FUNC_META_INSN_SIZE)) {
+			ret = -EBUSY;
+			goto out;
+		}
+		prog = insn;
+	}
+	text_poke(target, prog, FUNC_META_INSN_SIZE);
+	text_poke_sync();
+out:
+	mutex_unlock(&text_mutex);
+	return ret;
+}
+
+static void __func_meta_put(void *ip, struct func_meta *meta)
+{
+	if (WARN_ON_ONCE(meta->users <= 0))
+		return;
+
+	meta->users--;
+	if (meta->users > 0)
+		return;
+
+	/* TODO: we need a way to shrink the array "func_metas" */
+	func_meta_used--;
+	if (!func_meta_exist(ip))
+		return;
+
+	func_meta_text_poke(ip, 0, true);
+}
+
+void func_meta_put(void *ip, struct func_meta *meta)
+{
+	mutex_lock(&func_meta_lock);
+	__func_meta_put(ip, meta);
+	mutex_unlock(&func_meta_lock);
+}
+EXPORT_SYMBOL_GPL(func_meta_put);
+
+struct func_meta *func_meta_get(void *ip)
+{
+	struct func_meta *tmp = NULL;
+	u32 index;
+
+	mutex_lock(&func_meta_lock);
+	if (func_meta_exist(ip)) {
+		index = func_meta_get_index(ip);
+		if (WARN_ON_ONCE(index >= func_meta_count))
+			goto out;
+
+		tmp = &func_metas[index];
+		tmp->users++;
+		goto out;
+	}
+
+	tmp = func_meta_get_next(&index);
+	if (!tmp)
+		goto out;
+
+	if (func_meta_text_poke(ip, index, false)) {
+		__func_meta_put(ip, tmp);
+		goto out;
+	}
+	tmp->func = ip;
+out:
+	mutex_unlock(&func_meta_lock);
+	return tmp;
+}
+EXPORT_SYMBOL_GPL(func_meta_get);
-- 
2.39.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ