lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241028121921.1264150-3-bigeasy@linutronix.de>
Date: Mon, 28 Oct 2024 13:13:56 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
	Darren Hart <dvhart@...radead.org>,
	Davidlohr Bueso <dave@...olabs.net>,
	Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Valentin Schneider <vschneid@...hat.com>,
	Waiman Long <longman@...hat.com>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [RFC PATCH v2 2/4] futex: Add basic infrastructure for local task local hash.

The futex hashmap is system wide and shared by random tasks. Each slot
is hashed based on its address and VMA. Due to randomized VMAs the same
logical lock (pointer) can end up in a different hash bucket on each
invocation of the application. This in turn means that different
applications may share a hash bucket on each invocation and it is not
always clear which applications will be involved. This can result in
high latency's to acquire the futex_hash_bucket::lock especially if the
lock owner is limited to a CPU and not be effectively PI boosted.

Introduce a task local hash map. The hashmap can be allocated via
	prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_ALLOCATE, 0)

The `0' argument allocates a default number of 4 slots, a higher number
can be specified if desired. The current uppoer limit is 16.
The allocated hashmap is used by all threads within a process.
A thread can check if the private map has been allocated via
	prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_IS_SHARED);

Which return the current number of slots.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 include/linux/futex.h        |  7 ++++
 include/linux/sched/signal.h |  4 +++
 include/uapi/linux/prctl.h   |  5 +++
 kernel/fork.c                |  1 +
 kernel/futex/core.c          | 65 ++++++++++++++++++++++++++++++++++++
 kernel/sys.c                 |  4 +++
 6 files changed, 86 insertions(+)

diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85c..dad50173f70c4 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -77,6 +77,8 @@ void futex_exec_release(struct task_struct *tsk);
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+		     unsigned long arg4, unsigned long arg5);
 #else
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_exit_recursive(struct task_struct *tsk) { }
@@ -88,6 +90,11 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
 {
 	return -EINVAL;
 }
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+				   unsigned long arg4, unsigned long arg5)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c8ed09ac29ac5..3b8c8975cd493 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -14,6 +14,8 @@
 #include <linux/mm_types.h>
 #include <asm/ptrace.h>
 
+struct futex_hash_bucket;
+
 /*
  * Types defining task->signal and task->sighand and APIs using them:
  */
@@ -246,6 +248,8 @@ struct signal_struct {
 						 * and may have inconsistent
 						 * permissions.
 						 */
+	unsigned int			futex_hash_mask;
+	struct futex_hash_bucket	*futex_hash_bucket;
 } __randomize_layout;
 
 /*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 35791791a879b..e912ce82de41f 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -328,4 +328,9 @@ struct prctl_mm_map {
 # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC	0x10 /* Clear the aspect on exec */
 # define PR_PPC_DEXCR_CTRL_MASK		0x1f
 
+/* FUTEX hash management */
+#define PR_FUTEX_HASH			74
+# define PR_FUTEX_HASH_ALLOCATE		1
+# define PR_FUTEX_HASH_IS_SHARED	2
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af25..0d2b0a5299bbc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -949,6 +949,7 @@ static inline void free_signal_struct(struct signal_struct *sig)
 {
 	taskstats_tgid_free(sig);
 	sched_autogroup_exit(sig);
+	kfree(sig->futex_hash_bucket);
 	/*
 	 * __mmdrop is not safe to call from softirq context on x86 due to
 	 * pgd_dtor so postpone it to the async context
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index de6d7f71961eb..14e4cb5ccd722 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -39,6 +39,7 @@
 #include <linux/memblock.h>
 #include <linux/fault-inject.h>
 #include <linux/slab.h>
+#include <linux/prctl.h>
 
 #include "futex.h"
 #include "../locking/rtmutex_common.h"
@@ -1153,6 +1154,70 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
 	spin_lock_init(&fhb->lock);
 }
 
+static int futex_hash_allocate(unsigned long arg3, unsigned long arg4,
+			       unsigned long arg5)
+{
+	unsigned int hash_slots = arg3;
+	struct futex_hash_bucket *fhb;
+	int i;
+
+	if (!thread_group_leader(current))
+		return -EINVAL;
+
+	if (current->signal->futex_hash_bucket)
+		return -EALREADY;
+
+	if (hash_slots == 0)
+		hash_slots = 4;
+	if (hash_slots < 2)
+		hash_slots = 2;
+	if (hash_slots > 16)
+		hash_slots = 16;
+	if (!is_power_of_2(hash_slots))
+		hash_slots = rounddown_pow_of_two(hash_slots);
+
+	fhb = kmalloc_array(hash_slots, sizeof(struct futex_hash_bucket), GFP_KERNEL);
+	if (!fhb)
+		return -ENOMEM;
+
+	current->signal->futex_hash_mask = hash_slots - 1;
+
+	for (i = 0; i < hash_slots; i++)
+		futex_hash_bucket_init(&fhb[i]);
+
+	current->signal->futex_hash_bucket = fhb;
+	return 0;
+}
+
+static int futex_hash_is_shared(unsigned long arg3, unsigned long arg4,
+				unsigned long arg5)
+{
+	if (current->signal->futex_hash_bucket)
+		return current->signal->futex_hash_mask + 1;
+	return 0;
+}
+
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+		     unsigned long arg4, unsigned long arg5)
+{
+	int ret;
+
+	switch (arg2) {
+	case PR_FUTEX_HASH_ALLOCATE:
+		ret = futex_hash_allocate(arg3, arg4, arg5);
+		break;
+
+	case PR_FUTEX_HASH_IS_SHARED:
+		ret = futex_hash_is_shared(arg3, arg4, arg5);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
 static int __init futex_init(void)
 {
 	unsigned int futex_shift;
diff --git a/kernel/sys.c b/kernel/sys.c
index 4da31f28fda81..0dcbb8ce9f19d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
 #include <linux/user_namespace.h>
 #include <linux/time_namespace.h>
 #include <linux/binfmts.h>
+#include <linux/futex.h>
 
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
@@ -2784,6 +2785,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_RISCV_SET_ICACHE_FLUSH_CTX:
 		error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
 		break;
+	case PR_FUTEX_HASH:
+		error = futex_hash_prctl(arg2, arg3, arg4, arg5);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ