lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241026224306.982896-3-bigeasy@linutronix.de>
Date: Sun, 27 Oct 2024 00:34:51 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
	Darren Hart <dvhart@...radead.org>,
	Davidlohr Bueso <dave@...olabs.net>,
	Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Valentin Schneider <vschneid@...hat.com>,
	Waiman Long <longman@...hat.com>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [RFC PATCH 2/3] futex: Add basic infrastructure for local task local hash.

The futex hashmap is system wide and shared by random tasks. Each slot
is hashed based on its address and VMA. Due to randomized VMAs the same
logical lock (pointer) can end up in a different hash bucket on each
invocation of the application. This in turn means that different
applications may share a hash bucket on each invocation and it is not
always clear which applications will be involved. This can result in
high latency's to acquire the futex_hash_bucket::lock especially if the
lock owner is limited to a CPU and not be effectively PI boosted.

Introduce a task local hash map. The hashmap can be allocated via
	prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_ALLOCATE, 0)

The `0' argument allocates a default number of 4 slots, a higher number
can be specified if desired. The current uppoer limit is 16.
The hashmap can be shared with other threads within an application via
	prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SHARE);

Once the shared hashmap is enabled, all threads must enable it.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 include/linux/futex.h      |   8 +++
 include/linux/sched.h      |   2 +
 include/uapi/linux/prctl.h |   5 ++
 kernel/futex/core.c        | 125 +++++++++++++++++++++++++++++++++++++
 kernel/sys.c               |   4 ++
 5 files changed, 144 insertions(+)

diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85c..e92cbea336e8e 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -69,6 +69,7 @@ static inline void futex_init_task(struct task_struct *tsk)
 	tsk->pi_state_cache = NULL;
 	tsk->futex_state = FUTEX_STATE_OK;
 	mutex_init(&tsk->futex_exit_mutex);
+	rcu_assign_pointer(tsk->futex_hash_table, NULL);
 }
 
 void futex_exit_recursive(struct task_struct *tsk);
@@ -77,6 +78,8 @@ void futex_exec_release(struct task_struct *tsk);
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+		     unsigned long arg4, unsigned long arg5);
 #else
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_exit_recursive(struct task_struct *tsk) { }
@@ -88,6 +91,11 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
 {
 	return -EINVAL;
 }
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+				   unsigned long arg4, unsigned long arg5)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ade6417609002..8854c6029a9b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -58,6 +58,7 @@ struct bpf_net_context;
 struct capture_control;
 struct cfs_rq;
 struct fs_struct;
+struct futex_hash_table;
 struct futex_pi_state;
 struct io_context;
 struct io_uring_task;
@@ -1281,6 +1282,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user	*robust_list;
+	struct futex_hash_table		*futex_hash_table;
 #ifdef CONFIG_COMPAT
 	struct compat_robust_list_head __user *compat_robust_list;
 #endif
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 35791791a879b..2475b128ba85d 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -328,4 +328,9 @@ struct prctl_mm_map {
 # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC	0x10 /* Clear the aspect on exec */
 # define PR_PPC_DEXCR_CTRL_MASK		0x1f
 
+/* FUTEX hash management */
+#define PR_FUTEX_HASH			74
+# define PR_FUTEX_HASH_ALLOCATE		1
+# define PR_FUTEX_HASH_SHARE		2
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index de6d7f71961eb..7c97fc96f84a3 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -39,6 +39,7 @@
 #include <linux/memblock.h>
 #include <linux/fault-inject.h>
 #include <linux/slab.h>
+#include <linux/prctl.h>
 
 #include "futex.h"
 #include "../locking/rtmutex_common.h"
@@ -55,6 +56,12 @@ static struct {
 #define futex_queues   (__futex_data.queues)
 #define futex_hashsize (__futex_data.hashsize)
 
+struct futex_hash_table {
+	unsigned int			slots;
+	int				users;
+	spinlock_t			lock;
+	struct  futex_hash_bucket	queues[];
+};
 
 /*
  * Fault injections for futexes.
@@ -1040,6 +1047,9 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
 
 static void futex_cleanup(struct task_struct *tsk)
 {
+	struct futex_hash_table *fht;
+	bool need_free = false;
+
 	if (unlikely(tsk->robust_list)) {
 		exit_robust_list(tsk);
 		tsk->robust_list = NULL;
@@ -1054,6 +1064,23 @@ static void futex_cleanup(struct task_struct *tsk)
 
 	if (unlikely(!list_empty(&tsk->pi_state_list)))
 		exit_pi_state_list(tsk);
+
+	rcu_read_lock();
+	fht = rcu_dereference(current->futex_hash_table);
+	if (fht) {
+
+		spin_lock(&fht->lock);
+		fht->users--;
+		WARN_ON_ONCE(fht->users < 0);
+		if (fht->users == 0)
+			need_free = true;
+		spin_unlock(&fht->lock);
+		rcu_assign_pointer(current->futex_hash_table, NULL);
+	}
+	rcu_read_unlock();
+
+	if (need_free)
+		kfree_rcu_mightsleep(fht);
 }
 
 /**
@@ -1153,6 +1180,104 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
 	spin_lock_init(&fhb->lock);
 }
 
+static int futex_hash_allocate(unsigned long arg3, unsigned long arg4,
+			       unsigned long arg5)
+{
+	unsigned int hash_slots = arg3;
+	struct futex_hash_table *fht;
+	size_t struct_size;
+	int i;
+
+	if (hash_slots == 0)
+		hash_slots = 4;
+	if (hash_slots < 2)
+		hash_slots = 2;
+	if (hash_slots > 16)
+		hash_slots = 16;
+	if (!is_power_of_2(hash_slots))
+		hash_slots = rounddown_pow_of_two(hash_slots);
+
+	if (current->futex_hash_table)
+		return -EALREADY;
+
+	struct_size = hash_slots * sizeof(struct futex_hash_bucket);
+	struct_size += sizeof(struct futex_hash_table);
+	fht = kmalloc(struct_size, GFP_KERNEL);
+	if (!fht)
+		return -ENOMEM;
+
+	fht->slots = hash_slots;
+	fht->users = 1;
+	spin_lock_init(&fht->lock);
+
+	for (i = 0; i < hash_slots; i++)
+		futex_hash_bucket_init(&fht->queues[i]);
+
+	rcu_assign_pointer(current->futex_hash_table, fht);
+	return 0;
+}
+
+static int futex_hash_share(unsigned long arg3, unsigned long arg4,
+			    unsigned long arg5)
+{
+	struct futex_hash_table *fht;
+	struct task_struct *task;
+	pid_t task_pid;
+	int ret;
+
+	rcu_read_lock();
+	/* XXX maybe auto attach on fork() */
+	task_pid = task_tgid_vnr(current);
+	task = find_task_by_vpid(task_pid);
+	if (!task) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	fht = rcu_dereference(task->futex_hash_table);
+	if (!fht) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	spin_lock(&fht->lock);
+	if (fht->users <= 0) {
+		ret  = -EINVAL;
+		goto unlock_out;
+	}
+	fht->users++;
+
+	rcu_assign_pointer(current->futex_hash_table, fht);
+	ret = 0;
+
+unlock_out:
+	spin_unlock(&fht->lock);
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+		     unsigned long arg4, unsigned long arg5)
+{
+	int ret;
+
+	switch (arg2) {
+	case PR_FUTEX_HASH_ALLOCATE:
+		ret = futex_hash_allocate(arg3, arg4, arg5);
+		break;
+
+	case PR_FUTEX_HASH_SHARE:
+		ret = futex_hash_share(arg3, arg4, arg5);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
 static int __init futex_init(void)
 {
 	unsigned int futex_shift;
diff --git a/kernel/sys.c b/kernel/sys.c
index 4da31f28fda81..0dcbb8ce9f19d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
 #include <linux/user_namespace.h>
 #include <linux/time_namespace.h>
 #include <linux/binfmts.h>
+#include <linux/futex.h>
 
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
@@ -2784,6 +2785,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_RISCV_SET_ICACHE_FLUSH_CTX:
 		error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
 		break;
+	case PR_FUTEX_HASH:
+		error = futex_hash_prctl(arg2, arg3, arg4, arg5);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ