linux-kernel - [RFC PATCH v1 6/6] selftests/rseq: Implement NUMA node id vs mm

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240823185946.418340-7-mathieu.desnoyers@efficios.com>
Date: Fri, 23 Aug 2024 14:59:46 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>
Cc: linux-kernel@...r.kernel.org,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Valentin Schneider <vschneid@...hat.com>,
	Mel Gorman <mgorman@...e.de>,
	Steven Rostedt <rostedt@...dmis.org>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Ben Segall <bsegall@...gle.com>,
	Yury Norov <yury.norov@...il.com>,
	Rasmus Villemoes <linux@...musvillemoes.dk>,
	Shuah Khan <skhan@...uxfoundation.org>,
	linux-kselftest@...r.kernel.org
Subject: [RFC PATCH v1 6/6] selftests/rseq: Implement NUMA node id vs mm_cid invariant test

This test validates that the mapping between a mm_cid and a NUMA node id
remains invariant for the process lifetime for a process with a number of
threads >= number of allowed CPUs. In other words, it validates that if
any thread within the process running on behalf of a mm_cid N observes a
NUMA node id M, all threads within this process will always observe the
same NUMA node id value when running on behalf of that same mm_cid.

This characteristic is important for NUMA locality.

On all architectures except Power, the NUMA topology is never
reconfigured after a CPU has been associated with a NUMA node in the
system lifetime. Even on Power, we can assume that NUMA topology
reconfiguration happens rarely, and therefore we do not expect it to
happen while the NUMA test is running.

As a result the NUMA node id associated with a mm_cid should be
invariant as long as:

- A process has a number of threads >= number of allowed CPUs,
- The allowed CPUs mask is unchanged, and
- The NUMA configuration is unchanged.

This test is skipped on architectures that do not implement
rseq_load_u32_u32.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Reviewed-by: Shuah Khan <skhan@...uxfoundation.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Shuah Khan <skhan@...uxfoundation.org>
Cc: linux-kselftest@...r.kernel.org
---
 tools/testing/selftests/rseq/.gitignore       |   1 +
 tools/testing/selftests/rseq/Makefile         |   2 +-
 .../testing/selftests/rseq/basic_numa_test.c  | 144 ++++++++++++++++++
 3 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/rseq/basic_numa_test.c

diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 16496de5f6ce..8a8d163cbb9f 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+basic_numa_test
 basic_percpu_ops_test
 basic_percpu_ops_mm_cid_test
 basic_test
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 5a3432fceb58..9ef1c949114a 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -14,7 +14,7 @@ LDLIBS += -lpthread -ldl
 # still track changes to header files and depend on shared object.
 OVERRIDE_TARGETS = 1
 
-TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
+TEST_GEN_PROGS = basic_test basic_numa_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
 		param_test_benchmark param_test_compare_twice param_test_mm_cid \
 		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
 
diff --git a/tools/testing/selftests/rseq/basic_numa_test.c b/tools/testing/selftests/rseq/basic_numa_test.c
new file mode 100644
index 000000000000..8e51c662057d
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_numa_test.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Basic rseq NUMA test. Validate that (mm_cid, numa_node_id) pairs are
+ * invariant when the number of threads >= number of allowed CPUs, as
+ * long as those preconditions are respected:
+ *
+ *   - A process has a number of threads >= number of allowed CPUs,
+ *   - The allowed CPUs mask is unchanged, and
+ *   - The NUMA configuration is unchanged.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/time.h>
+
+#include "rseq.h"
+
+#define NR_LOOPS	100
+
+static int nr_threads, nr_active_threads, test_go, test_stop;
+
+#ifdef RSEQ_ARCH_HAS_LOAD_U32_U32
+
+static int cpu_numa_id[CPU_SETSIZE];
+
+static int get_affinity_weight(void)
+{
+	cpu_set_t allowed_cpus;
+
+	if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus)) {
+		perror("sched_getaffinity");
+		abort();
+	}
+	return CPU_COUNT(&allowed_cpus);
+}
+
+static void numa_id_init(void)
+{
+	int i;
+
+	for (i = 0; i < CPU_SETSIZE; i++)
+		cpu_numa_id[i] = -1;
+}
+
+static void *test_thread(void *arg)
+{
+	int i;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	/*
+	 * Rendez-vous across all threads to make sure the number of
+	 * threads >= number of possible CPUs for the entire test duration.
+	 */
+	if (__atomic_add_fetch(&nr_active_threads, 1, __ATOMIC_RELAXED) == nr_threads)
+		__atomic_store_n(&test_go, 1, __ATOMIC_RELAXED);
+	while (!__atomic_load_n(&test_go, __ATOMIC_RELAXED))
+		rseq_barrier();
+
+	for (i = 0; i < NR_LOOPS; i++) {
+		uint32_t mm_cid, node;
+		int cached_node_id;
+
+		while (rseq_load_u32_u32(RSEQ_MO_RELAXED, &mm_cid,
+					 &rseq_get_abi()->mm_cid,
+					 &node, &rseq_get_abi()->node_id) != 0) {
+			/* Retry. */
+		}
+		cached_node_id = RSEQ_READ_ONCE(cpu_numa_id[mm_cid]);
+		if (cached_node_id == -1) {
+			RSEQ_WRITE_ONCE(cpu_numa_id[mm_cid], node);
+		} else {
+			if (node != cached_node_id) {
+				fprintf(stderr, "Error: NUMA node id discrepancy: mm_cid %u cached node id %d node id %u.\n",
+					mm_cid, cached_node_id, node);
+				fprintf(stderr, "This is likely a kernel bug, or caused by a concurrent NUMA topology reconfiguration.\n");
+				abort();
+			}
+		}
+		(void) poll(NULL, 0, 10);	/* wait 10ms */
+	}
+	/*
+	 * Rendez-vous before exiting all threads to make sure the
+	 * number of threads >= number of possible CPUs for the entire
+	 * test duration.
+	 */
+	if (__atomic_sub_fetch(&nr_active_threads, 1, __ATOMIC_RELAXED) == 0)
+		__atomic_store_n(&test_stop, 1, __ATOMIC_RELAXED);
+	while (!__atomic_load_n(&test_stop, __ATOMIC_RELAXED))
+		rseq_barrier();
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	return NULL;
+}
+
+static int test_numa(void)
+{
+	pthread_t tid[nr_threads];
+	int err, i;
+	void *tret;
+
+	numa_id_init();
+
+	printf("testing rseq (mm_cid, numa_node_id) invariant, multi-threaded (%d threads)\n",
+	       nr_threads);
+
+	for (i = 0; i < nr_threads; i++) {
+		err = pthread_create(&tid[i], NULL, test_thread, NULL);
+		if (err != 0)
+			abort();
+	}
+
+	for (i = 0; i < nr_threads; i++) {
+		err = pthread_join(tid[i], &tret);
+		if (err != 0)
+			abort();
+	}
+
+	return 0;
+}
+#else
+static int test_numa(void)
+{
+	fprintf(stderr, "rseq_load_u32_u32 is not implemented on this architecture. Skipping numa test.\n");
+	return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	nr_threads = get_affinity_weight();
+	return test_numa();
+}
-- 
2.39.2