lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250729022640.3134066-5-yuzhuo@google.com>
Date: Mon, 28 Jul 2025 19:26:37 -0700
From: Yuzhuo Jing <yuzhuo@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Mark Rutland <mark.rutland@....com>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>, 
	Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>, 
	Liang Kan <kan.liang@...ux.intel.com>, Yuzhuo Jing <yzj@...ch.edu>, 
	Yuzhuo Jing <yuzhuo@...gle.com>, Andrea Parri <parri.andrea@...il.com>, 
	Palmer Dabbelt <palmer@...osinc.com>, Charlie Jenkins <charlie@...osinc.com>, 
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>, Kumar Kartikeya Dwivedi <memxor@...il.com>, 
	Alexei Starovoitov <ast@...nel.org>, Barret Rhoden <brho@...gle.com>, 
	Alexandre Ghiti <alexghiti@...osinc.com>, Guo Ren <guoren@...nel.org>, linux-kernel@...r.kernel.org, 
	linux-perf-users@...r.kernel.org
Subject: [PATCH v1 4/7] tools: Implement userspace per-cpu

Implement userspace per-cpu for imported kernel code.  Compared with
simple thread-local definition, the kernel per-cpu provides 1) a
guarantee of static lifetime even when thread exits, and 2) the ability
to access other CPU's per-cpu data.

This patch adds an alternative implementation and interface for
userspace per-cpu.  The kernel implementation uses special ELF sections
and offset calculation.  For simplicity, this version defines a
PERCPU_MAX length global array for each per-cpu data, and uses a
thread-local cpu id for indexing.

Signed-off-by: Yuzhuo Jing <yuzhuo@...gle.com>
---
 tools/include/linux/compiler_types.h  |   3 +
 tools/include/linux/percpu-simulate.h | 128 ++++++++++++++++++++++++++
 2 files changed, 131 insertions(+)
 create mode 100644 tools/include/linux/percpu-simulate.h

diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index 9a2a2f8d7b6c..46550c500b8c 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -31,6 +31,9 @@
 # define __cond_lock(x,c) (c)
 #endif /* __CHECKER__ */
 
+/* Per-cpu checker flag does not use address space attribute in userspace */
+#define __percpu
+
 /*
  * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving
  *			       non-scalar types unchanged.
diff --git a/tools/include/linux/percpu-simulate.h b/tools/include/linux/percpu-simulate.h
new file mode 100644
index 000000000000..a6af2f2211eb
--- /dev/null
+++ b/tools/include/linux/percpu-simulate.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userspace implementation of per_cpu_ptr for adapted kernel code.
+ *
+ * Userspace code does not have and does not need a per-cpu concept, but
+ * instead can declare variables as thread-local.  However, the kernel per-cpu
+ * further provides 1) the guarantee of static lifetime when thread exits, and
+ * 2) the ability to access other CPU's per-cpu data.  This file provides a
+ * simple implementation of such functionality, but with slightly different
+ * APIs and without linker script changes.
+ *
+ * 2025  Yuzhuo Jing <yuzhuo@...gle.com>
+ */
+#ifndef __PERCPU_SIMULATE_H__
+#define __PERCPU_SIMULATE_H__
+
+#include <assert.h>
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/*
+ * The maximum supported number of CPUs.  Per-cpu variables are defined as a
+ * PERCPU_MAX length array, indexed by a thread-local cpu id.
+ */
+#define PERCPU_MAX 4096
+
+#ifdef ASSERT_PERCPU
+#define __check_cpu_id(cpu)						\
+({									\
+	u32 cpuid = (cpu);						\
+	assert(cpuid < PERCPU_MAX);					\
+	cpuid;								\
+})
+#else
+#define __check_cpu_id(cpu)	(cpu)
+#endif
+
+/*
+ * Use weak symbol: only define __thread_per_cpu_id variable if any perf tool
+ * includes this header file.
+ */
+_Thread_local u32 __thread_per_cpu_id __weak;
+
+static inline u32 get_this_cpu_id(void)
+{
+	return __thread_per_cpu_id;
+}
+
+/*
+ * The user code must call this function inside of each thread that uses
+ * per-cpu data structures.  The user code can choose an id of their choice,
+ * but must ensure each thread uses a different id.
+ *
+ * Safety: asserts CPU id smaller than PERCPU_MAX if ASSERT_PERCPU is defined.
+ */
+static inline void set_this_cpu_id(u32 id)
+{
+	__thread_per_cpu_id = __check_cpu_id(id);
+}
+
+/*
+ * Declare a per-cpu data structure.  This only declares the data type and
+ * array length. Different per-cpu data are differentiated by a key (identifer).
+ *
+ * Different from the kernel version, this API must be called before the actual
+ * definition (i.e. DEFINE_PER_CPU_ALIGNED).
+ *
+ * Note that this implementation does not support prepending static qualifier,
+ * or appending assignment expressions.
+ */
+#define DECLARE_PER_CPU_ALIGNED(key, type, data) \
+	extern struct __percpu_type_##key { \
+		type data; \
+	} __percpu_data_##key[PERCPU_MAX]
+
+/*
+ * Define the per-cpu data storage for a given key.  This uses a previously
+ * defined data type in DECLARE_PER_CPU_ALIGNED.
+ *
+ * Different from the kernel version, this API only accepts a key name.
+ */
+#define DEFINE_PER_CPU_ALIGNED(key) \
+	struct __percpu_type_##key __percpu_data_##key[PERCPU_MAX]
+
+#define __raw_per_cpu_value(key, field, cpu) \
+	(__percpu_data_##key[cpu].field)
+
+/*
+ * Get a pointer of per-cpu data for a given key.
+ *
+ * Different from the kernel version, users of this API don't need to pass the
+ * address of the base variable (through `&varname').
+ *
+ * Safety: asserts CPU id smaller than PERCPU_MAX if ASSERT_PERCPU is defined.
+ */
+#define per_cpu_ptr(key, field, cpu) (&per_cpu_value(key, field, cpu))
+#define this_cpu_ptr(key, field) (&this_cpu_value(key, field))
+
+/*
+ * Additional APIs for direct value access.  Effectively, `*per_cpu_ptr(...)'.
+ *
+ * Safety: asserts CPU id smaller than PERCPU_MAX if ASSERT_PERCPU is defined.
+ */
+#define per_cpu_value(key, field, cpu) \
+	(__raw_per_cpu_value(key, field, __check_cpu_id(cpu)))
+#define this_cpu_value(key, field) \
+	(__raw_per_cpu_value(key, field, __thread_per_cpu_id))
+
+/*
+ * Helper functions of simple per-cpu operations.
+ *
+ * The kernel version differentiates __this_cpu_* from this_cpu_* for
+ * preemption/interrupt-safe contexts, but the userspace version defines them
+ * as the same.
+ */
+
+#define __this_cpu_add(key, field, val)	(this_cpu_value(key, field) += (val))
+#define __this_cpu_sub(key, field, val)	(this_cpu_value(key, field) -= (val))
+#define __this_cpu_inc(key, field)	(++this_cpu_value(key, field))
+#define __this_cpu_dec(key, field)	(--this_cpu_value(key, field))
+
+#define this_cpu_add	__this_cpu_add
+#define this_cpu_sub	__this_cpu_sub
+#define this_cpu_inc	__this_cpu_inc
+#define this_cpu_dec	__this_cpu_dec
+
+#endif /* __PERCPU_SIMULATE_H__ */
-- 
2.50.1.487.gc89ff58d15-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ