[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <tip-d2f3f5d2e9cae6e73f9642a5ddc8c8a07c35e79b@git.kernel.org>
Date: Tue, 21 Jul 2015 02:36:17 -0700
From: tip-bot for Davidlohr Bueso <tipbot@...or.com>
To: linux-tip-commits@...r.kernel.org
Cc: acme@...hat.com, linux-kernel@...r.kernel.org, tglx@...utronix.de,
dbueso@...e.de, hpa@...or.com, mingo@...nel.org, dave@...olabs.net,
mgorman@...e.de
Subject: [tip:perf/core] perf bench futex: Add lock_pi stresser
Commit-ID: d2f3f5d2e9cae6e73f9642a5ddc8c8a07c35e79b
Gitweb: http://git.kernel.org/tip/d2f3f5d2e9cae6e73f9642a5ddc8c8a07c35e79b
Author: Davidlohr Bueso <dave@...olabs.net>
AuthorDate: Tue, 7 Jul 2015 01:55:53 -0700
Committer: Arnaldo Carvalho de Melo <acme@...hat.com>
CommitDate: Mon, 20 Jul 2015 17:49:51 -0300
perf bench futex: Add lock_pi stresser
Allows a way of measuring low level kernel implementation of FUTEX_LOCK_PI and
FUTEX_UNLOCK_PI.
The program comes in two flavors:
(i) single futex (default), all threads contend on the same uaddr. For the
sake of the benchmark, we call into kernel space even when the lock is
uncontended. The kernel will set it to TID, any waters that come in and
contend for the pi futex will be handled respectively by the kernel.
(ii) -M option for multiple futexes, each thread deals with its own futex. This
is a trivial scenario and only measures kernel handling of 0->TID transition.
Signed-off-by: Davidlohr Bueso <dbueso@...e.de>
Cc: Mel Gorman <mgorman@...e.de>
Link: http://lkml.kernel.org/r/1436259353.12255.78.camel@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
---
tools/perf/Documentation/perf-bench.txt | 4 +
tools/perf/bench/Build | 1 +
tools/perf/bench/bench.h | 2 +
tools/perf/bench/{futex-hash.c => futex-lock-pi.c} | 202 +++++++++++----------
tools/perf/bench/futex.h | 20 ++
tools/perf/builtin-bench.c | 2 +
6 files changed, 132 insertions(+), 99 deletions(-)
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index bf3d064..ab632d9 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -216,6 +216,10 @@ Suite for evaluating parallel wake calls.
*requeue*::
Suite for evaluating requeue calls.
+*lock-pi*::
+Suite for evaluating futex lock_pi calls.
+
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index c3ab760..573e288 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -5,6 +5,7 @@ perf-y += futex-hash.o
perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
+perf-y += futex-lock-pi.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 70b2f71..a50df86 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
extern int bench_futex_wake_parallel(int argc, const char **argv,
const char *prefix);
extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
+/* pi futexes */
+extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-lock-pi.c
similarity index 53%
copy from tools/perf/bench/futex-hash.c
copy to tools/perf/bench/futex-lock-pi.c
index fc9bebd..bc6a16a 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -1,11 +1,5 @@
/*
- * Copyright (C) 2013 Davidlohr Bueso <davidlohr@...com>
- *
- * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
- *
- * This program is particularly useful for measuring the kernel's futex hash
- * table/function implementation. In order for it to make sense, use with as
- * many threads and futexes as possible.
+ * Copyright (C) 2015 Davidlohr Bueso.
*/
#include "../perf.h"
@@ -21,19 +15,6 @@
#include <sys/time.h>
#include <pthread.h>
-static unsigned int nthreads = 0;
-static unsigned int nsecs = 10;
-/* amount of futexes per thread */
-static unsigned int nfutexes = 1024;
-static bool fshared = false, done = false, silent = false;
-static int futex_flag = 0;
-
-struct timeval start, end, runtime;
-static pthread_mutex_t thread_lock;
-static unsigned int threads_starting;
-static struct stats throughput_stats;
-static pthread_cond_t thread_parent, thread_worker;
-
struct worker {
int tid;
u_int32_t *futex;
@@ -41,24 +22,55 @@ struct worker {
unsigned long ops;
};
+static u_int32_t global_futex = 0;
+static struct worker *worker;
+static unsigned int nsecs = 10;
+static bool silent = false, multi = false;
+static bool done = false, fshared = false;
+static unsigned int ncpus, nthreads = 0;
+static int futex_flag = 0;
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
static const struct option options[] = {
- OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
- OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
- OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
- OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
- OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};
-static const char * const bench_futex_hash_usage[] = {
- "perf bench futex hash <options>",
+static const char * const bench_futex_lock_pi_usage[] = {
+ "perf bench futex requeue <options>",
NULL
};
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int) runtime.tv_sec);
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+}
+
static void *workerfn(void *arg)
{
- int ret;
- unsigned int i;
struct worker *w = (struct worker *) arg;
pthread_mutex_lock(&thread_lock);
@@ -69,58 +81,70 @@ static void *workerfn(void *arg)
pthread_mutex_unlock(&thread_lock);
do {
- for (i = 0; i < nfutexes; i++, w->ops++) {
- /*
- * We want the futex calls to fail in order to stress
- * the hashing of uaddr and not measure other steps,
- * such as internal waitqueue handling, thus enlarging
- * the critical region protected by hb->lock.
- */
- ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
- if (!silent &&
- (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
- warn("Non-expected futex return call");
+ int ret;
+ again:
+ ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
+
+ if (ret) { /* handle lock acquisition */
+ if (!silent)
+ warn("thread %d: Could not lock pi-lock for %p (%d)",
+ w->tid, w->futex, ret);
+ if (done)
+ break;
+
+ goto again;
}
+
+ usleep(1);
+ ret = futex_unlock_pi(w->futex, futex_flag);
+ if (ret && !silent)
+ warn("thread %d: Could not unlock pi-lock for %p (%d)",
+ w->tid, w->futex, ret);
+ w->ops++; /* account for thread's share of work */
} while (!done);
return NULL;
}
-static void toggle_done(int sig __maybe_unused,
- siginfo_t *info __maybe_unused,
- void *uc __maybe_unused)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr)
{
- /* inform all threads that we're done for the day */
- done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
-}
+ cpu_set_t cpu;
+ unsigned int i;
-static void print_summary(void)
-{
- unsigned long avg = avg_stats(&throughput_stats);
- double stddev = stddev_stats(&throughput_stats);
+ threads_starting = nthreads;
- printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
- !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+
+ if (multi) {
+ worker[i].futex = calloc(1, sizeof(u_int32_t));
+ if (!worker[i].futex)
+ err(EXIT_FAILURE, "calloc");
+ } else
+ worker[i].futex = &global_futex;
+
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+ err(EXIT_FAILURE, "pthread_create");
+ }
}
-int bench_futex_hash(int argc, const char **argv,
- const char *prefix __maybe_unused)
+int bench_futex_lock_pi(int argc, const char **argv,
+ const char *prefix __maybe_unused)
{
int ret = 0;
- cpu_set_t cpu;
+ unsigned int i;
struct sigaction act;
- unsigned int i, ncpus;
pthread_attr_t thread_attr;
- struct worker *worker = NULL;
- argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
- if (argc) {
- usage_with_options(bench_futex_hash_usage, options);
- exit(EXIT_FAILURE);
- }
+ argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
+ if (argc)
+ goto err;
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -128,18 +152,18 @@ int bench_futex_hash(int argc, const char **argv,
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
- if (!nthreads) /* default to the number of CPUs */
+ if (!nthreads)
nthreads = ncpus;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
- goto errmem;
+ err(EXIT_FAILURE, "calloc");
if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
- printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
- getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+ printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
+ getpid(), nthreads, nsecs);
init_stats(&throughput_stats);
pthread_mutex_init(&thread_lock, NULL);
@@ -149,25 +173,8 @@ int bench_futex_hash(int argc, const char **argv,
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
gettimeofday(&start, NULL);
- for (i = 0; i < nthreads; i++) {
- worker[i].tid = i;
- worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
- if (!worker[i].futex)
- goto errmem;
-
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
-
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
- if (ret)
- err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
-
- ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
- (void *)(struct worker *) &worker[i]);
- if (ret)
- err(EXIT_FAILURE, "pthread_create");
- }
+ create_threads(worker, thread_attr);
pthread_attr_destroy(&thread_attr);
pthread_mutex_lock(&thread_lock);
@@ -192,24 +199,21 @@ int bench_futex_hash(int argc, const char **argv,
for (i = 0; i < nthreads; i++) {
unsigned long t = worker[i].ops/runtime.tv_sec;
+
update_stats(&throughput_stats, t);
- if (!silent) {
- if (nfutexes == 1)
- printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
- worker[i].tid, &worker[i].futex[0], t);
- else
- printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
- worker[i].tid, &worker[i].futex[0],
- &worker[i].futex[nfutexes-1], t);
- }
+ if (!silent)
+ printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, worker[i].futex, t);
- free(worker[i].futex);
+ if (multi)
+ free(worker[i].futex);
}
print_summary();
free(worker);
return ret;
-errmem:
- err(EXIT_FAILURE, "calloc");
+err:
+ usage_with_options(bench_futex_lock_pi_usage, options);
+ exit(EXIT_FAILURE);
}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 7ed22ff..d44de9f 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
}
/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect: whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(u_int32_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
* @nr_wake: wake up to this many tasks
* @nr_requeue: requeue up to this many tasks
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b5314e4..f67934d 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -60,6 +60,8 @@ static struct bench futex_benchmarks[] = {
{ "wake", "Benchmark for futex wake calls", bench_futex_wake },
{ "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
{ "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
+ /* pi-futexes */
+ { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
{ "all", "Test all futex benchmarks", NULL },
{ NULL, NULL, NULL }
};
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists