[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241028121921.1264150-3-bigeasy@linutronix.de>
Date: Mon, 28 Oct 2024 13:13:56 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: André Almeida <andrealmeid@...lia.com>,
Darren Hart <dvhart@...radead.org>,
Davidlohr Bueso <dave@...olabs.net>,
Ingo Molnar <mingo@...hat.com>,
Juri Lelli <juri.lelli@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Valentin Schneider <vschneid@...hat.com>,
Waiman Long <longman@...hat.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [RFC PATCH v2 2/4] futex: Add basic infrastructure for local task local hash.
The futex hashmap is system wide and shared by random tasks. Each slot
is hashed based on its address and VMA. Due to randomized VMAs the same
logical lock (pointer) can end up in a different hash bucket on each
invocation of the application. This in turn means that different
applications may share a hash bucket on each invocation and it is not
always clear which applications will be involved. This can result in
high latency's to acquire the futex_hash_bucket::lock especially if the
lock owner is limited to a CPU and not be effectively PI boosted.
Introduce a task local hash map. The hashmap can be allocated via
prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_ALLOCATE, 0)
The `0' argument allocates a default number of 4 slots, a higher number
can be specified if desired. The current uppoer limit is 16.
The allocated hashmap is used by all threads within a process.
A thread can check if the private map has been allocated via
prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_IS_SHARED);
Which return the current number of slots.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
include/linux/futex.h | 7 ++++
include/linux/sched/signal.h | 4 +++
include/uapi/linux/prctl.h | 5 +++
kernel/fork.c | 1 +
kernel/futex/core.c | 65 ++++++++++++++++++++++++++++++++++++
kernel/sys.c | 4 +++
6 files changed, 86 insertions(+)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85c..dad50173f70c4 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -77,6 +77,8 @@ void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5);
#else
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
@@ -88,6 +90,11 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
{
return -EINVAL;
}
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return -EINVAL;
+}
#endif
#endif
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c8ed09ac29ac5..3b8c8975cd493 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -14,6 +14,8 @@
#include <linux/mm_types.h>
#include <asm/ptrace.h>
+struct futex_hash_bucket;
+
/*
* Types defining task->signal and task->sighand and APIs using them:
*/
@@ -246,6 +248,8 @@ struct signal_struct {
* and may have inconsistent
* permissions.
*/
+ unsigned int futex_hash_mask;
+ struct futex_hash_bucket *futex_hash_bucket;
} __randomize_layout;
/*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 35791791a879b..e912ce82de41f 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -328,4 +328,9 @@ struct prctl_mm_map {
# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 74
+# define PR_FUTEX_HASH_ALLOCATE 1
+# define PR_FUTEX_HASH_IS_SHARED 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af25..0d2b0a5299bbc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -949,6 +949,7 @@ static inline void free_signal_struct(struct signal_struct *sig)
{
taskstats_tgid_free(sig);
sched_autogroup_exit(sig);
+ kfree(sig->futex_hash_bucket);
/*
* __mmdrop is not safe to call from softirq context on x86 due to
* pgd_dtor so postpone it to the async context
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index de6d7f71961eb..14e4cb5ccd722 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -39,6 +39,7 @@
#include <linux/memblock.h>
#include <linux/fault-inject.h>
#include <linux/slab.h>
+#include <linux/prctl.h>
#include "futex.h"
#include "../locking/rtmutex_common.h"
@@ -1153,6 +1154,70 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
spin_lock_init(&fhb->lock);
}
+static int futex_hash_allocate(unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ unsigned int hash_slots = arg3;
+ struct futex_hash_bucket *fhb;
+ int i;
+
+ if (!thread_group_leader(current))
+ return -EINVAL;
+
+ if (current->signal->futex_hash_bucket)
+ return -EALREADY;
+
+ if (hash_slots == 0)
+ hash_slots = 4;
+ if (hash_slots < 2)
+ hash_slots = 2;
+ if (hash_slots > 16)
+ hash_slots = 16;
+ if (!is_power_of_2(hash_slots))
+ hash_slots = rounddown_pow_of_two(hash_slots);
+
+ fhb = kmalloc_array(hash_slots, sizeof(struct futex_hash_bucket), GFP_KERNEL);
+ if (!fhb)
+ return -ENOMEM;
+
+ current->signal->futex_hash_mask = hash_slots - 1;
+
+ for (i = 0; i < hash_slots; i++)
+ futex_hash_bucket_init(&fhb[i]);
+
+ current->signal->futex_hash_bucket = fhb;
+ return 0;
+}
+
+static int futex_hash_is_shared(unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ if (current->signal->futex_hash_bucket)
+ return current->signal->futex_hash_mask + 1;
+ return 0;
+}
+
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ int ret;
+
+ switch (arg2) {
+ case PR_FUTEX_HASH_ALLOCATE:
+ ret = futex_hash_allocate(arg3, arg4, arg5);
+ break;
+
+ case PR_FUTEX_HASH_IS_SHARED:
+ ret = futex_hash_is_shared(arg3, arg4, arg5);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
static int __init futex_init(void)
{
unsigned int futex_shift;
diff --git a/kernel/sys.c b/kernel/sys.c
index 4da31f28fda81..0dcbb8ce9f19d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
#include <linux/user_namespace.h>
#include <linux/time_namespace.h>
#include <linux/binfmts.h>
+#include <linux/futex.h>
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
@@ -2784,6 +2785,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_RISCV_SET_ICACHE_FLUSH_CTX:
error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
break;
+ case PR_FUTEX_HASH:
+ error = futex_hash_prctl(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;
--
2.45.2
Powered by blists - more mailing lists