lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <77297f66-29b5-de51-5fd0-54ff31444afe@linux.intel.com>
Date:   Wed, 27 Sep 2017 18:25:26 +0300
From:   Alexey Budankov <alexey.budankov@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...el.com>,
        Dmitri Prokhorov <Dmitry.Prohorov@...el.com>,
        Valery Cherepennikov <valery.cherepennikov@...el.com>,
        linux-kernel <linux-kernel@...r.kernel.org>
Subject: [RFC][PATCH v1] perf stat: FD based pause/resume for counting mode

Hi,

Here is FD based concept for perf stat pause/resume functionality.

The patch implements asynchronous pause/resume commands for configured counters
thru file descriptors passed over new cmd-fd, cmd-ack-fd options.

The commands are sent over cmd-fd option. File descriptor passed thru cmd-ack-fd 
option is employed for confirmation on command completion.

Handling of signals and the timer for processing intervals is also 
implemented thru file descriptors.

Thus all sources of async events are switched to file descriptors and 
multiplexed at the single place using poll system call.

initial_delay option is changed to signed int and negative values (-1) mean to
start paused so that switching state would be controlled thru cmd-fd, cmd-ack-fd
file descriptors.

I tested that as for launch mode when perf tool starts a workload as for attach 
mode thru -p option. See attached logs and straces for more details.

Attached pause_test.cpp demonstrates one of the usage models for 
this functionality.

Signed-off-by: Alexey Budankov <alexey.budankov@...ux.intel.com>
---
 tools/perf/builtin-stat.c | 434 +++++++++++++++++++++++++++++++++-------------
 tools/perf/util/evlist.c  |   8 +-
 2 files changed, 321 insertions(+), 121 deletions(-)

 Sources of async events: enum perf_async_event
 
 Pause/resume protocol (perf_async_cmd_done is sent back on the completion):
	enum perf_async_cmd
	
 Global state specific to this change: struct perf_async
 
 Initialization: int perf_async_init(const int interval)

 Finalization: int perf_async_fini(void)
 
 poll() loop calling handlers below:
	int perf_async_handle_events(void *param)
  
 For signals:
	int perf_async_signal_handler(union perf_async_data *data, void *param)
 
 For timer intervals:
	int perf_async_timer_handler(union perf_async_data *data, void *param)
 
 For pause/resume commands:
	int perf_async_cmd_handler(union perf_async_data *data, void *param)

 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 866da7a..a03e30e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -80,6 +80,9 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <poll.h>
+#include <sys/signalfd.h>
+#include <sys/timerfd.h>
 
 #include "sane_ctype.h"
 
@@ -156,7 +159,7 @@ static bool			group				= false;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
-static unsigned int		initial_delay			= 0;
+static int			initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
 static bool			metric_only			= false;
@@ -185,13 +188,49 @@ struct perf_stat {
 static struct perf_stat		perf_stat;
 #define STAT_RECORD		perf_stat.record
 
-static volatile int done = 0;
-
 static struct perf_stat_config stat_config = {
 	.aggr_mode	= AGGR_GLOBAL,
 	.scale		= true,
 };
 
+enum perf_async_event
+{
+	perf_async_signal = 0,
+	perf_async_timer,
+	perf_async_cmd,
+	perf_async_cmd_ack,
+	perf_async_event_eof
+};
+
+enum perf_async_cmd
+{
+	perf_async_cmd_pause = 0,
+	perf_async_cmd_resume,
+	perf_async_cmd_done,
+	perf_async_cmd_eof
+};
+
+union perf_async_data {
+	struct signalfd_siginfo siginfo;
+	uint64_t timer_expired;
+	enum perf_async_cmd cmd;
+};
+
+struct perf_async_handler_data {
+	int argc;
+	const char **argv;
+	int *status;
+};
+
+typedef int (*perf_async_handler_t)(union perf_async_data*, void *);
+
+struct {
+	struct pollfd fds[perf_async_event_eof];
+	perf_async_handler_t handlers[perf_async_event_eof];
+	struct itimerspec timer_settings;
+	bool paused;
+} perf_async;
+
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -410,31 +449,6 @@ static void process_interval(void)
 	print_counters(&rs, 0, NULL);
 }
 
-static void enable_counters(void)
-{
-	if (initial_delay)
-		usleep(initial_delay * USEC_PER_MSEC);
-
-	/*
-	 * We need to enable counters only if:
-	 * - we don't have tracee (attaching to task or cpu)
-	 * - we have initial delay configured
-	 */
-	if (!target__none(&target) || initial_delay)
-		perf_evlist__enable(evsel_list);
-}
-
-static void disable_counters(void)
-{
-	/*
-	 * If we don't have tracee (attaching to task or cpu), counters may
-	 * still be running. To get accurate group ratios, we must stop groups
-	 * from counting before reading their constituent counters.
-	 */
-	if (!target__none(&target))
-		perf_evlist__disable(evsel_list);
-}
-
 static volatile int workload_exec_errno;
 
 /*
@@ -582,26 +596,257 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
 	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
 }
 
+static int perf_async_signal_handler(union perf_async_data *data, void *param)
+{
+	uint32_t signo = data->siginfo.ssi_signo;
+	int32_t code = data->siginfo.ssi_code;
+	struct perf_async_handler_data *handler_data = param;
+	char msg[BUFSIZ];
+
+	if (verbose > 1) {
+		fprintf(stat_config.output,
+			"%s: signo %d, code %d\n", __FUNCTION__, signo, code);
+	}
+
+	switch (signo) {
+	case SIGCHLD:
+		if (code == CLD_EXITED || code == CLD_KILLED || code == CLD_DUMPED) {
+			*handler_data->status = data->siginfo.ssi_status;
+
+			if (workload_exec_errno) {
+				const char *emsg =
+					str_error_r(workload_exec_errno, msg, sizeof(msg));
+				pr_err("Workload failed: %s\n", emsg);
+				return 1;
+			}
+
+			if (WIFSIGNALED(*handler_data->status))
+				psignal(WTERMSIG(*handler_data->status),
+					handler_data->argv[0]);
+
+			if (verbose > 1) {
+				fprintf(stat_config.output,
+					"%s: workload exited: status %d\n",
+					__FUNCTION__, *handler_data->status);
+			}
+
+			return 1;
+		}
+	case SIGINT:
+		if (forever) {
+			forever = !forever;
+			return 1;
+		}
+		if (!(handler_data->argc > 0))
+			return 1;
+	case SIGABRT:
+	case SIGALRM:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int perf_async_timer_handler(union perf_async_data *data, void *param)
+{
+	param = param;
+
+	if (verbose > 1) {
+		fprintf(stat_config.output,
+			"%s: timer_expired: %"PRIu64"\n",
+			__FUNCTION__, data->timer_expired);
+	}
+
+	process_interval();
+
+	return 0;
+}
+
+static int perf_async_cmd_handler(union perf_async_data *data, void *param)
+{
+	param = param;
+
+	if (verbose > 1) {
+		fprintf(stat_config.output, "%s: cmd %d\n",
+			__FUNCTION__, data->cmd);
+	}
+
+	if (data->cmd == perf_async_cmd_resume && perf_async.paused) {
+
+		perf_evlist__enable(evsel_list);
+
+		if (perf_async.fds[perf_async_timer].fd != -1 &&
+		    (perf_async.timer_settings.it_value.tv_sec ||
+		     perf_async.timer_settings.it_value.tv_nsec))
+			timerfd_settime(perf_async.fds[perf_async_timer].fd,
+				0, &perf_async.timer_settings, NULL);
+
+		perf_async.paused = !perf_async.paused;
+		if (verbose > 1) {
+			fprintf(stat_config.output,
+				"%s: events resumed\n", __FUNCTION__);
+		}
+	} else if (data->cmd == perf_async_cmd_pause && !perf_async.paused) {
+
+		struct itimerspec timer_settings = {0};
+
+		if (perf_async.fds[perf_async_timer].fd != -1 &&
+		    (perf_async.timer_settings.it_value.tv_sec ||
+		     perf_async.timer_settings.it_value.tv_nsec))
+			timerfd_settime(perf_async.fds[perf_async_timer].fd,
+				0, &timer_settings, NULL);
+
+		perf_evlist__disable(evsel_list);
+
+		perf_async.paused = !perf_async.paused;
+		if (verbose > 1) {
+			fprintf(stat_config.output,
+				"%s: events paused\n", __FUNCTION__);
+		}
+	}
+
+	return 0;
+}
+
+static int perf_async_init(const int interval)
+{
+	int i = 0;
+	sigset_t mask;
+
+	sigemptyset(&mask);
+
+	for (i = 0; i < perf_async_event_eof; i++) {
+		//perf_async.fds[i].fd = -1;
+		perf_async.fds[i].events = POLLIN;
+		perf_async.fds[i].revents = 0;
+		perf_async.handlers[i] = NULL;
+	}
+
+	memset(&perf_async.timer_settings, 0,
+		sizeof(perf_async.timer_settings));
+
+	sigaddset(&mask, SIGCHLD);
+	sigaddset(&mask, SIGINT);
+	sigaddset(&mask, SIGALRM);
+	sigaddset(&mask, SIGABRT);
+
+	if (sigprocmask(SIG_BLOCK, &mask, NULL))
+		return 0;
+
+	perf_async.fds[perf_async_signal].fd = signalfd(-1, &mask, SFD_CLOEXEC);
+	if (perf_async.fds[perf_async_signal].fd == -1)
+		return 0;
+
+	perf_async.handlers[perf_async_signal] = perf_async_signal_handler;
+
+	if (interval) {
+		perf_async.timer_settings.it_interval.tv_sec =
+			interval / USEC_PER_MSEC;
+		perf_async.timer_settings.it_interval.tv_nsec =
+			(interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
+		perf_async.timer_settings.it_value =
+			perf_async.timer_settings.it_interval;
+
+		perf_async.fds[perf_async_timer].fd =
+				timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+		if (perf_async.fds[perf_async_timer].fd == -1)
+			return 0;
+
+		perf_async.handlers[perf_async_timer] =
+				perf_async_timer_handler;
+	}
+
+	perf_async.paused = true;
+	perf_async.handlers[perf_async_cmd] = perf_async_cmd_handler;
+	perf_async.fds[perf_async_cmd_ack].events = POLLOUT;
+
+	return 1;
+}
+
+static int perf_async_fini(void)
+{
+	int i = 0;
+	sigset_t mask;
+
+	sigemptyset(&mask);
+
+	sigaddset(&mask, SIGCHLD);
+	sigaddset(&mask, SIGINT);
+	sigaddset(&mask, SIGALRM);
+	sigaddset(&mask, SIGABRT);
+
+	sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+	if (perf_async.fds[perf_async_signal].fd != -1)
+		close(perf_async.fds[perf_async_signal].fd);
+
+	if (perf_async.fds[perf_async_timer].fd != -1)
+		close(perf_async.fds[perf_async_timer].fd);
+
+	if (perf_async.fds[perf_async_cmd].fd != -1)
+		close(perf_async.fds[perf_async_cmd].fd);
+
+	if (perf_async.fds[perf_async_cmd].fd != -1)
+		close(perf_async.fds[perf_async_cmd].fd);
+
+	if (perf_async.fds[perf_async_cmd_ack].fd != -1)
+		close(perf_async.fds[perf_async_cmd_ack].fd);
+
+	for (i = 0; i < perf_async_event_eof; i++) {
+		perf_async.fds[i].fd = -1;
+		perf_async.fds[i].events = 0;
+		perf_async.fds[i].revents = 0;
+		perf_async.handlers[i] = NULL;
+	}
+
+	return 0;
+}
+
+static int perf_async_handle_events(void *param)
+{
+	union perf_async_data data;
+	int ret, i = 0;
+	bool stop = false;
+
+	while (!stop) {
+
+		ret = poll(perf_async.fds, perf_async_event_eof, -1);
+		if (!(ret > 0))
+			break;
+
+		for (i = 0; i < perf_async_event_eof; i++) {
+			if (perf_async.fds[i].revents & POLLIN) {
+				memset(&data, 0, sizeof(data));
+				ret = read(perf_async.fds[i].fd, &data, sizeof(data));
+				if (!(ret > 0))
+					continue;
+				ret = perf_async.handlers[i](&data, param);
+				if (ret)
+					stop = true;
+			} else if (perf_async.fds[i].revents & POLLOUT) {
+				enum perf_async_cmd cmd = perf_async_cmd_done;
+				ret = write(perf_async.fds[i].fd, &cmd, sizeof(cmd));
+				if (!(ret > 0))
+					continue;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int __run_perf_stat(int argc, const char **argv)
 {
-	int interval = stat_config.interval;
 	char msg[BUFSIZ];
 	unsigned long long t0, t1;
 	struct perf_evsel *counter;
-	struct timespec ts;
 	size_t l;
 	int status = 0;
 	const bool forks = (argc > 0);
 	bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
 	struct perf_evsel_config_term *err_term;
-
-	if (interval) {
-		ts.tv_sec  = interval / USEC_PER_MSEC;
-		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
-	} else {
-		ts.tv_sec  = 1;
-		ts.tv_nsec = 0;
-	}
+	struct perf_async_handler_data handler_data = { argc, argv, &status};
 
 	if (forks) {
 		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
@@ -697,36 +942,37 @@ static int __run_perf_stat(int argc, const char **argv)
 	t0 = rdclock();
 	clock_gettime(CLOCK_MONOTONIC, &ref_time);
 
-	if (forks) {
+	if (forks)
 		perf_evlist__start_workload(evsel_list);
-		enable_counters();
 
-		if (interval) {
-			while (!waitpid(child_pid, &status, WNOHANG)) {
-				nanosleep(&ts, NULL);
-				process_interval();
-			}
-		}
-		wait(&status);
-
-		if (workload_exec_errno) {
-			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
-			pr_err("Workload failed: %s\n", emsg);
-			return -1;
-		}
+	if (initial_delay > 0)
+		usleep(initial_delay * USEC_PER_MSEC);
 
-		if (WIFSIGNALED(status))
-			psignal(WTERMSIG(status), argv[0]);
-	} else {
-		enable_counters();
-		while (!done) {
-			nanosleep(&ts, NULL);
-			if (interval)
-				process_interval();
+	/*
+	 * We need to enable counters only if:
+	 * - we don't have tracee (attaching to task or cpu)
+	 * - we have initial delay configured
+	 */
+	if (initial_delay >= 0) {
+		if (!target__none(&target) || initial_delay) {
+			union perf_async_data data;
+			data.cmd = perf_async_cmd_resume;
+			perf_async_cmd_handler(&data, NULL);
 		}
 	}
 
-	disable_counters();
+	perf_async_handle_events(&handler_data);
+
+	/*
+	 * If we don't have tracee (attaching to task or cpu), counters may
+	 * still be running. To get accurate group ratios, we must stop groups
+	 * from counting before reading their constituent counters.
+	 */
+	if (!target__none(&target)) {
+		union perf_async_data data;
+		data.cmd = perf_async_cmd_pause;
+		perf_async_cmd_handler(&data, NULL);
+	}
 
 	t1 = rdclock();
 
@@ -1696,49 +1942,6 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	fflush(stat_config.output);
 }
 
-static volatile int signr = -1;
-
-static void skip_signal(int signo)
-{
-	if ((child_pid == -1) || stat_config.interval)
-		done = 1;
-
-	signr = signo;
-	/*
-	 * render child_pid harmless
-	 * won't send SIGTERM to a random
-	 * process in case of race condition
-	 * and fast PID recycling
-	 */
-	child_pid = -1;
-}
-
-static void sig_atexit(void)
-{
-	sigset_t set, oset;
-
-	/*
-	 * avoid race condition with SIGCHLD handler
-	 * in skip_signal() which is modifying child_pid
-	 * goal is to avoid send SIGTERM to a random
-	 * process
-	 */
-	sigemptyset(&set);
-	sigaddset(&set, SIGCHLD);
-	sigprocmask(SIG_BLOCK, &set, &oset);
-
-	if (child_pid != -1)
-		kill(child_pid, SIGTERM);
-
-	sigprocmask(SIG_SETMASK, &oset, NULL);
-
-	if (signr == -1)
-		return;
-
-	signal(signr, SIG_DFL);
-	kill(getpid(), signr);
-}
-
 static int stat__set_big_num(const struct option *opt __maybe_unused,
 			     const char *s __maybe_unused, int unset)
 {
@@ -1811,8 +2014,10 @@ static const struct option stat_options[] = {
 		     "aggregate counts per physical processor core", AGGR_CORE),
 	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
 		     "aggregate counts per thread", AGGR_THREAD),
-	OPT_UINTEGER('D', "delay", &initial_delay,
-		     "ms to wait before starting measurement after program start"),
+	OPT_INTEGER('D', "delay", &initial_delay,
+		     "ms to wait (forever: -1) before starting measurement after program start"),
+	OPT_INTEGER(0, "cmd-fd", &(perf_async.fds[perf_async_cmd].fd), ""),
+	OPT_INTEGER(0, "cmd-ack-fd", &(perf_async.fds[perf_async_cmd_ack].fd), ""),
 	OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
@@ -2745,18 +2950,7 @@ int cmd_stat(int argc, const char **argv)
 	if (perf_stat_init_aggr_mode())
 		goto out;
 
-	/*
-	 * We dont want to block the signals - that would cause
-	 * child tasks to inherit that and Ctrl-C would not work.
-	 * What we want is for Ctrl-C to work in the exec()-ed
-	 * task, but being ignored by perf stat itself:
-	 */
-	atexit(sig_atexit);
-	if (!forever)
-		signal(SIGINT,  skip_signal);
-	signal(SIGCHLD, skip_signal);
-	signal(SIGALRM, skip_signal);
-	signal(SIGABRT, skip_signal);
+	perf_async_init(interval);
 
 	status = 0;
 	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
@@ -2771,6 +2965,8 @@ int cmd_stat(int argc, const char **argv)
 		}
 	}
 
+	perf_async_fini();
+
 	if (!forever && status != -1 && !interval)
 		print_counters(NULL, argc, argv);
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6a0d7ff..433e095 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1712,12 +1712,16 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar
 
 	if (!evlist->workload.pid) {
 		int ret;
+		sigset_t mask;
 
-		if (pipe_output)
-			dup2(2, 1);
+		sigfillset(&mask);
+		sigprocmask(SIG_UNBLOCK, &mask, NULL);
 
 		signal(SIGTERM, SIG_DFL);
 
+		if (pipe_output)
+			dup2(2, 1);
+
 		close(child_ready_pipe[0]);
 		close(go_pipe[1]);
 		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);




Download attachment "pause_resume_fd.zip" of type "application/x-zip-compressed" (55997 bytes)

View attachment "pause_test.cpp" of type "text/plain" (2155 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ