[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250604092815.UtG-oi0v@linutronix.de>
Date: Wed, 4 Jun 2025 11:28:15 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: Chris Mason <clm@...a.com>
Cc: Peter Zijlstra <peterz@...radead.org>, linux-kernel@...r.kernel.org
Subject: Re: futex performance regression from "futex: Allow automatic
allocation of process wide futex hash"
On 2025-06-03 15:00:43 [-0400], Chris Mason wrote:
> Hi everyone,
Hi,
> While testing Peter's latest scheduler patches against current Linus
> git, I found a pretty big performance regression with schbench:
>
> https://github.com/masoncl/schbench
>
> The command line I was using:
>
> schbench -L -m 4 -M auto -t 256 -n 0 -r 60 -s 0
>
…
> schbench uses one futex per thread, and the command line ends up
> allocating 1024 threads, so the default bucket size used by this commit
> is just too small. Using 2048 buckets makes the problem go away.
There is also this pthread_mutex_t but yeah
> On my big turin system, this commit slows down RPS by 36%. But even a
> VM on a skylake machine sees a 29% difference.
>
> schbench is a microbenchmark, so grain of salt on all of this, but I
> think our defaults are probably too low.
we could
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index abbd97c2fcba8..9046f3d9693e7 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -1687,7 +1687,7 @@ int futex_hash_allocate_default(void)
scoped_guard(rcu) {
threads = min_t(unsigned int,
get_nr_threads(current),
- num_online_cpus());
+ num_online_cpus() * 2);
fph = rcu_dereference(current->mm->futex_phash);
if (fph) {
which would increase it to 2048 as Chris asks for.
Then there the possibility of
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index abbd97c2fcba8..a19a96cc09c9e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -1680,6 +1680,8 @@ int futex_hash_allocate_default(void)
{
unsigned int threads, buckets, current_buckets = 0;
struct futex_private_hash *fph;
+ bool current_immutable = false;
+ unsigned int flags = 0;
if (!current->mm)
return 0;
@@ -1695,6 +1697,7 @@ int futex_hash_allocate_default(void)
return 0;
current_buckets = fph->hash_mask + 1;
+ current_immutable = fph->immutable;
}
}
@@ -1705,10 +1708,13 @@ int futex_hash_allocate_default(void)
buckets = roundup_pow_of_two(4 * threads);
buckets = clamp(buckets, 16, futex_hashmask + 1);
- if (current_buckets >= buckets)
- return 0;
+ if (current_buckets >= buckets) {
+ if (current_immutable)
+ return 0;
+ flags = FH_IMMUTABLE;
+ }
- return futex_hash_allocate(buckets, 0);
+ return futex_hash_allocate(buckets, flags);
}
static int futex_hash_get_slots(void)
to make hash immutable once the upper limit has been reached. There will
be no more auto-resize. One could argue that if the user did not touch
it, he might not do it at all.
This would avoid the reference accounting. Some testing:
256 cores, 2xNUMA:
| average rps: 1 701 947.02 Futex HBs: 0 immutable: 1
| average rps: 785 446.07 Futex HBs: 1024 immutable: 0
| average rps: 1 586 755.62 Futex HBs: 1024 immutable: 1
| average rps: 736 769.77 Futex HBs: 2048 immutable: 0
| average rps: 1 555 182.52 Futex HBs: 2048 immutable: 1
144 cores, 4xNUMA:
| average rps: 2 691 912.55 Futex HBs: 0 immutable: 1
| average rps: 1 306 443.68 Futex HBs: 1024 immutable: 0
| average rps: 2 471 382.28 Futex HBs: 1024 immutable: 1
| average rps: 1 269 503.90 Futex HBs: 2048 immutable: 0
| average rps: 2 656 228.67 Futex HBs: 2048 immutable: 1
tested with this on top:
diff --git a/schbench.c b/schbench.c
index 1be3e280f5c38..40a5f0091111e 100644
--- a/schbench.c
+++ b/schbench.c
@@ -19,6 +19,8 @@
#include <unistd.h>
#include <errno.h>
#include <getopt.h>
+#include <linux/prctl.h>
+#include <sys/prctl.h>
#include <sys/time.h>
#include <time.h>
#include <string.h>
@@ -42,6 +44,9 @@
#define USEC_PER_SEC (1000000)
+static int futex_size = -1;
+static int futex_flags;
+
/* -m number of message threads */
static int message_threads = 1;
/* -t number of workers per message thread */
@@ -127,7 +132,7 @@ enum {
HELP_LONG_OPT = 1,
};
-char *option_string = "p:m:M:W:t:Cr:R:w:i:z:A:n:F:Lj:s:J:";
+char *option_string = "p:m:M:W:t:Cr:R:w:i:z:A:n:F:Lj:s:J:H:I";
static struct option long_options[] = {
{"pipe", required_argument, 0, 'p'},
{"message-threads", required_argument, 0, 'm'},
@@ -176,6 +181,29 @@ static void print_usage(void)
exit(1);
}
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+#endif
+
+static int futex_hash_slots_set(unsigned int slots, int flags)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags);
+}
+
+static int futex_hash_slots_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+}
+
+static int futex_hash_immutable_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
+}
+
/*
* returns 0 if we fail to parse and 1 if we succeed
*/
@@ -347,6 +375,13 @@ static void parse_options(int ac, char **av)
exit(1);
}
break;
+ case 'H':
+ futex_size = atoi(optarg);
+ break;
+ case 'I':
+ futex_flags = FH_FLAG_IMMUTABLE;
+ break;
+
case '?':
case HELP_LONG_OPT:
print_usage();
@@ -1811,6 +1846,9 @@ int main(int ac, char **av)
}
}
+ if (futex_size >= 0)
+ futex_hash_slots_set(futex_size, futex_flags);
+
requests_per_sec /= message_threads;
loops_per_sec = 0;
stopping = 0;
@@ -1920,6 +1958,8 @@ int main(int ac, char **av)
}
free(message_threads_mem);
+ fprintf(stderr, "Futex HBs: %d immutable: %d\n", futex_hash_slots_get(),
+ futex_hash_immutable_get());
return 0;
}
Comments?
> -chris
Sebastian
Powered by blists - more mailing lists