lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181003212848.412256-3-songliubraving@fb.com>
Date:   Wed, 3 Oct 2018 14:28:48 -0700
From:   Song Liu <songliubraving@...com>
To:     <linux-kernel@...r.kernel.org>
CC:     <kernel-team@...com>, <ravi.bangoria@...ux.ibm.com>,
        <naveen.n.rao@...ux.vnet.ibm.com>, <maddy@...ux.vnet.ibm.com>,
        Song Liu <songliubraving@...com>, Tejun Heo <tj@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Jiri Olsa <jolsa@...nel.org>
Subject: [PATCH/RFC v4 2/2] perf tools: monitoring per task counter with per cgroup event

This is just a prototype.

Previous patch enables sharing hardware PMU among perf_events within same
perf_event_context. This sharing comes with limitation that per CPU event
cannot share hardware PMU with per task event. This limitation becomes
a blocker when certain events could only use a specific PMU, for example,
ref-cycles in some Intel CPUs. The following two commands will not share
the PMU (when run in parallel):

   perf stat -e ref-cycles -I 1000
   perf stat -e ref-cycles -I 1000 --pid <pid>

This patch shows a prototype that solves this problem with cgroup events.
With this patch, the following two commands can share the PMU:

   perf stat -e ref-cycles -I 1000
   perf stat -e ref-cycles -I 1000 --pid <pid> --create-cgroup

The second command creates a cgroup for the pid, and move the pid to
that cgroup. Then, a cgroup event (instead of task event) is created
to monitor the process.

Alternatively, we can also create a mechanism in the kernel that is very
similar to cgroup perf events. I am also open to other suggestions.

Signed-off-by: Song Liu <songliubraving@...com>
Cc: Tejun Heo <tj@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Jiri Olsa <jolsa@...nel.org>
---
 tools/perf/builtin-stat.c | 26 ++++++++++++++
 tools/perf/util/cgroup.c  | 76 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/cgroup.h  |  5 +++
 tools/perf/util/target.h  |  1 +
 4 files changed, 108 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b86aba1c8028..66a4da2d506e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -646,6 +646,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 				    ts, argc, argv);
 }

+static void cleanup(void)
+{
+	/* clean up cgroups */
+	if (target.create_cgroup) {
+		char name[32];
+
+		scnprintf(name, 31, "perf.%u", getpid());
+		cgroup__cleanup(name);
+	}
+}
+
 static volatile int signr = -1;

 static void skip_signal(int signo)
@@ -661,6 +672,7 @@ static void skip_signal(int signo)
 	 * and fast PID recycling
 	 */
 	child_pid = -1;
+	cleanup();
 }

 static void sig_atexit(void)
@@ -725,6 +737,8 @@ static const struct option stat_options[] = {
 		   "stat events on existing process id"),
 	OPT_STRING('t', "tid", &target.tid, "tid",
 		   "stat events on existing thread id"),
+	OPT_BOOLEAN(0, "create-cgroup", &target.create_cgroup,
+		    "create a cgroup for the pid/tid"),
 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('g', "group", &group,
@@ -1607,6 +1621,17 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__collect_metric_expr(evsel_list);
 	perf_stat__init_shadow_stats();

+	if (target.create_cgroup) {
+		char name[32];
+
+		scnprintf(name, 31, "perf.%u", getpid());
+		cgroup__create(name);
+		cgroup__add_pid(name, strtoul(target.pid, NULL, 0));
+
+		cgroup__add_evlist(name, evsel_list);
+		target.pid = NULL;
+	}
+
 	if (stat_config.csv_sep) {
 		stat_config.csv_output = true;
 		if (!strcmp(stat_config.csv_sep, "\\t"))
@@ -1906,5 +1931,6 @@ int cmd_stat(int argc, const char **argv)

 	runtime_stat_delete(&stat_config);

+	cleanup();
 	return status;
 }
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index ccd02634a616..f3e706f6fa96 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -249,3 +249,79 @@ int parse_cgroups(const struct option *opt, const char *str,
 	}
 	return 0;
 }
+
+int cgroup__add_evlist(const char *name, struct perf_evlist *evlist)
+{
+	return add_cgroup(evlist, name);
+}
+
+int cgroup__create(const char *name)
+{
+	char path[PATH_MAX + 1];
+	char mnt[PATH_MAX + 1];
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+	return mkdir(path, 0755);
+}
+
+int cgroup__add_pid(const char *name, pid_t pid)
+{
+	char path[PATH_MAX + 1];
+	char mnt[PATH_MAX + 1];
+	char buf[32];
+	int fd;
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s/cgroup.procs", mnt, name);
+	fd = open(path, O_WRONLY);
+	if (fd < 0)
+		return -1;
+	scnprintf(buf, 31, "%u", pid);
+	if (write(fd, buf, strlen(buf)) < 0)
+		fprintf(stderr, "Error writing %s to %s\n", buf, path);
+
+	close(fd);
+	return 0;
+}
+
+int cgroup__cleanup(const char *name)
+{
+	char path[PATH_MAX + 1];
+	char mnt[PATH_MAX + 1];
+	char *line;
+	size_t len = 0;
+	FILE *fp;
+	int fd;
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s/cgroup.procs", mnt, name);
+	fp = fopen(path, "r");
+
+	if (fp == NULL)
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/cgroup.procs", mnt);
+	fd = open(path, O_WRONLY);
+	if (fd < 0) {
+		fclose(fp);
+		return -1;
+	}
+
+	while (getline(&line, &len, fp) != -1) {
+		if (write(fd, line, strlen(line)) < 0)
+			fprintf(stderr, "Error writing %s to %s\n", line, path);
+	}
+	close(fd);
+	fclose(fp);
+
+	scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+	return rmdir(path);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index f033a80c1b14..7bdd8d99d130 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -26,4 +26,9 @@ void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgrou

 int parse_cgroups(const struct option *opt, const char *str, int unset);

+int cgroup__create(const char *name);
+int cgroup__cleanup(const char *name);
+int cgroup__add_pid(const char *name, pid_t pid);
+int cgroup__add_evlist(const char *name, struct perf_evlist *evlist);
+
 #endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 6ef01a83b24e..03c9ac06660a 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -15,6 +15,7 @@ struct target {
 	bool	     uses_mmap;
 	bool	     default_per_cpu;
 	bool	     per_thread;
+	bool	     create_cgroup;
 };

 enum target_errno {
--
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ