lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140711005836.25516.63968.stgit@yuno-kbuild.novalocal>
Date:	Fri, 11 Jul 2014 00:58:36 +0000
From:	Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@...achi.com>
To:	Steven Rostedt <rostedt@...dmis.org>
Cc:	Hidehiro Kawai <hidehiro.kawai.ez@...achi.com>,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>,
	yrl.pp-manager.tt@...achi.com, linux-kernel@...r.kernel.org,
	Aaron Fabbri <aaronx.j.fabbri@...el.com>
Subject: [PATCH V4 5/5] trace-cmd/record: Add --virt option for record mode

Add --virt option for record mode for a virtualization environment.
If we use this option on a guest, we can send trace data in low-overhead.
This is because guests can send trace data to a host without copying the data
by using splice(2).

The format is:

   trace-cmd record --virt -e sched*

<Note>
The client using virtio-serial does not wait for the connection message
"tracecmd" from the server. The client sends the connection message
MSG_TCONNECT first.

<Restriction>
This feature can use from kernel-3.6 which supports splice_read for ftrace
and splice_write for virtio-serial.

Changes in V4: Rebase for current trace-cmd-v2.4
               Add usage of --virt for record in trace-usage.c
               Divide tracecmd_msg_connect_to_server() into two functions
                (tracecmd_msg_connect_to_server() and
                                     tracecmd_msg_send_init_data_virt(fd))
Changes in V3: Change _nw/_NW to _net/_NET

Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@...achi.com>
---
 Documentation/trace-cmd-record.1.txt |   11 ++++-
 trace-cmd.h                          |    4 +-
 trace-msg.c                          |   79 +++++++++++++++++++++++++++++++---
 trace-msg.h                          |    4 ++
 trace-record.c                       |   71 ++++++++++++++++++++++++++++---
 trace-usage.c                        |    3 +
 6 files changed, 158 insertions(+), 14 deletions(-)

diff --git a/Documentation/trace-cmd-record.1.txt b/Documentation/trace-cmd-record.1.txt
index 9e63eb4..c0de074 100644
--- a/Documentation/trace-cmd-record.1.txt
+++ b/Documentation/trace-cmd-record.1.txt
@@ -258,6 +258,15 @@ OPTIONS
     timestamp to gettimeofday which will allow wall time output from the
     timestamps reading the created 'trace.dat' file.
 
+*--virt*::
+    This option is usded on a guest in a virtualization environment. If a host
+    is running "trace-cmd virt-server", this option is used to have the data
+    sent to the host with virtio-serial like *-N* option. (see also
+    trace-cmd-virt-server(1))
+
+    Note: This option is not supported with latency tracer plugins:
+      wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff
+
 EXAMPLES
 --------
 
@@ -320,7 +329,7 @@ SEE ALSO
 --------
 trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1),
 trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
-trace-cmd-list(1), trace-cmd-listen(1)
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-virt-server(1)
 
 AUTHOR
 ------
diff --git a/trace-cmd.h b/trace-cmd.h
index c4e5beb..1c1b0c3 100644
--- a/trace-cmd.h
+++ b/trace-cmd.h
@@ -250,7 +250,9 @@ void tracecmd_stat_cpu(struct trace_seq *s, int cpu);
 long tracecmd_flush_recording(struct tracecmd_recorder *recorder);
 
 /* for clients */
-int tracecmd_msg_send_init_data(int fd);
+int tracecmd_msg_connect_to_server(int fd);
+int tracecmd_msg_send_init_data_net(int fd);
+int tracecmd_msg_send_init_data_virt(int fd);
 int tracecmd_msg_metadata_send(int fd, char *buf, int size);
 int tracecmd_msg_finish_sending_metadata(int fd);
 void tracecmd_msg_send_close_msg(void);
diff --git a/trace-msg.c b/trace-msg.c
index 0d606dc..7ca31d6 100644
--- a/trace-msg.c
+++ b/trace-msg.c
@@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <arpa/inet.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <linux/types.h>
 
@@ -72,6 +73,7 @@ int cpu_count;
 static int psfd;
 unsigned int page_size;
 int *client_ports;
+int *virt_sfds;
 bool send_metadata;
 
 /* for server */
@@ -272,12 +274,20 @@ static int make_rinit(struct tracecmd_msg *msg)
 	return 0;
 }
 
+static int make_error_msg(u32 len, struct tracecmd_msg *msg)
+{
+	bufcpy(msg, TRACECMD_MSG_HDR_LEN, errmsg, len);
+	return 0;
+}
+
 static u32 tracecmd_msg_get_body_length(u32 cmd)
 {
 	struct tracecmd_msg *msg;
 	u32 len = 0;
 
 	switch (cmd) {
+	case MSG_ERROR:
+		return ntohl(errmsg->size);
 	case MSG_RCONNECT:
 		return sizeof(msg->data.rconnect.str.size) + CONNECTION_MSGSIZE;
 	case MSG_TINIT:
@@ -305,6 +315,7 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
 		       + sizeof(msg->data.rinit.port_array);
 	case MSG_SENDMETA:
 		return TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN;
+	case MSG_TCONNECT:
 	case MSG_CLOSE:
 	case MSG_FINMETA:
 		break;
@@ -313,15 +324,18 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
 	return 0;
 }
 
-static int tracecmd_msg_make_body(u32 cmd, struct tracecmd_msg *msg)
+static int tracecmd_msg_make_body(u32 cmd, u32 len, struct tracecmd_msg *msg)
 {
 	switch (cmd) {
+	case MSG_ERROR:
+		return make_error_msg(len, msg);
 	case MSG_RCONNECT:
 		return make_rconnect(CONNECTION_MSG, CONNECTION_MSGSIZE, msg);
 	case MSG_TINIT:
 		return make_tinit(msg);
 	case MSG_RINIT:
 		return make_rinit(msg);
+	case MSG_TCONNECT:
 	case MSG_CLOSE:
 	case MSG_SENDMETA: /* meta data is not stored here. */
 	case MSG_FINMETA:
@@ -346,7 +360,7 @@ static int tracecmd_msg_create(u32 cmd, struct tracecmd_msg **msg)
 	if (ret < 0)
 		return ret;
 
-	ret = tracecmd_msg_make_body(cmd, *msg);
+	ret = tracecmd_msg_make_body(cmd, len, *msg);
 	if (ret < 0)
 		free(*msg);
 
@@ -375,6 +389,12 @@ static int tracecmd_msg_send(int fd, u32 cmd)
 	return ret;
 }
 
+static void tracecmd_msg_send_error(int fd, struct tracecmd_msg *msg)
+{
+	errmsg = msg;
+	tracecmd_msg_send(fd, MSG_ERROR);
+}
+
 static int tracecmd_msg_read_extra(int fd, void *buf, u32 size, int *n)
 {
 	int r = 0;
@@ -499,9 +519,10 @@ static int tracecmd_msg_send_and_wait_for_msg(int fd, u32 cmd, struct tracecmd_m
 	return 0;
 }
 
-int tracecmd_msg_send_init_data(int fd)
+static int tracecmd_msg_send_init_data(int fd, bool net)
 {
 	char buf[TRACECMD_MSG_MAX_LEN];
+	char path[PATH_MAX];
 	struct tracecmd_msg *msg;
 	int i, cpus;
 	int ret;
@@ -512,9 +533,24 @@ int tracecmd_msg_send_init_data(int fd)
 		return ret;
 
 	cpus = ntohl(msg->data.rinit.cpus);
-	client_ports = malloc_or_die(sizeof(int) * cpus);
-	for (i = 0; i < cpus; i++)
-		client_ports[i] = ntohl(msg->data.rinit.port_array[i]);
+	if (net) {
+		client_ports = malloc_or_die(sizeof(int) * cpus);
+		for (i = 0; i < cpus; i++)
+			client_ports[i] =
+					ntohl(msg->data.rinit.port_array[i]);
+	} else {
+		virt_sfds = malloc_or_die(sizeof(int) * cpus);
+
+		/* Open data paths of virtio-serial */
+		for (i = 0; i < cpus; i++) {
+			snprintf(path, PATH_MAX, TRACE_PATH_CPU, i);
+			virt_sfds[i] = open(path, O_WRONLY);
+			if (virt_sfds[i] < 0) {
+				warning("Cannot open %s", TRACE_PATH_CPU, i);
+				return -errno;
+			}
+		}
+	}
 
 	/* Next, send meta data */
 	send_metadata = true;
@@ -522,6 +558,37 @@ int tracecmd_msg_send_init_data(int fd)
 	return 0;
 }
 
+int tracecmd_msg_send_init_data_net(int fd)
+{
+	return tracecmd_msg_send_init_data(fd, true);
+}
+
+int tracecmd_msg_send_init_data_virt(int fd)
+{
+	return tracecmd_msg_send_init_data(fd, false);
+}
+
+int tracecmd_msg_connect_to_server(int fd)
+{
+	char buf[TRACECMD_MSG_MAX_LEN];
+	struct tracecmd_msg *msg;
+	int ret;
+
+	msg = (struct tracecmd_msg *)buf;
+	/* connect to a server */
+	ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TCONNECT, msg);
+	if (ret < 0) {
+		if (ret == -EPROTONOSUPPORT)
+			goto error;
+	}
+
+	return ret;
+
+error:
+	tracecmd_msg_send_error(fd, msg);
+	return ret;
+}
+
 static bool process_option(struct tracecmd_msg_opt *opt)
 {
 	/* currently the only option we have is to us TCP */
diff --git a/trace-msg.h b/trace-msg.h
index b23e72b..502c1bf 100644
--- a/trace-msg.h
+++ b/trace-msg.h
@@ -2,6 +2,9 @@
 #define _TRACE_MSG_H_
 
 #include <stdbool.h>
+#define VIRTIO_PORTS	"/dev/virtio-ports/"
+#define AGENT_CTL_PATH	VIRTIO_PORTS "agent-ctl-path"
+#define TRACE_PATH_CPU	VIRTIO_PORTS "trace-path-cpu%d"
 
 #define UDP_MAX_PACKET	(65536 - 20)
 #define V2_MAGIC	"677768\0"
@@ -17,6 +20,7 @@ extern int cpu_count;
 extern unsigned int page_size;
 extern int *client_ports;
 extern bool send_metadata;
+extern int *virt_sfds;
 
 /* for server */
 extern bool done;
diff --git a/trace-record.c b/trace-record.c
index 79ce3a1..e56d294 100644
--- a/trace-record.c
+++ b/trace-record.c
@@ -77,6 +77,9 @@ static struct tracecmd_output *network_handle;
 /* Max size to let a per cpu file get */
 static int max_kb;
 
+struct tracecmd_output *virt_handle;
+static bool virt;
+
 static int do_ptrace;
 
 static int filter_task;
@@ -1787,6 +1790,9 @@ static int create_recorder(struct buffer_instance *instance, int cpu, int extrac
 	if (client_ports) {
 		connect_port(cpu);
 		recorder = tracecmd_create_recorder_fd(client_ports[cpu], cpu, recorder_flags);
+	} else if (virt_sfds) {
+		recorder = tracecmd_create_recorder_fd(virt_sfds[cpu], cpu,
+						       recorder_flags);
 	} else {
 		file = get_temp_file(instance, cpu);
 		recorder = create_recorder_instance(instance, file, cpu);
@@ -1822,7 +1828,7 @@ static void check_first_msg_from_server(int fd)
 		die("server not tracecmd server");
 }
 
-static void communicate_with_listener_v1(int fd)
+static void communicate_with_listener_v1_net(int fd)
 {
 	char buf[BUFSIZ];
 	ssize_t n;
@@ -1885,9 +1891,9 @@ static void communicate_with_listener_v1(int fd)
 	}
 }
 
-static void communicate_with_listener_v2(int fd)
+static void communicate_with_listener_v2_net(int fd)
 {
-	if (tracecmd_msg_send_init_data(fd) < 0)
+	if (tracecmd_msg_send_init_data_net(fd) < 0)
 		die("Cannot communicate with server");
 }
 
@@ -1925,6 +1931,15 @@ static void check_protocol_version(int fd)
 	}
 }
 
+static void communicate_with_listener_virt(int fd)
+{
+	if (tracecmd_msg_connect_to_server(fd) < 0)
+		die("Cannot communicate with server");
+
+	if (tracecmd_msg_send_init_data_virt(fd) < 0)
+		die("Cannot send init data");
+}
+
 static void setup_network(void)
 {
 	struct addrinfo hints;
@@ -1980,11 +1995,11 @@ again:
 			close(sfd);
 			goto again;
 		}
-		communicate_with_listener_v2(sfd);
+		communicate_with_listener_v2_net(sfd);
 	}
 
 	if (proto_ver == V1_PROTOCOL)
-		communicate_with_listener_v1(sfd);
+		communicate_with_listener_v1_net(sfd);
 
 	/* Now create the handle through this socket */
 	network_handle = tracecmd_create_init_fd_glob(sfd, listed_events);
@@ -1995,6 +2010,21 @@ again:
 	/* OK, we are all set, let'r rip! */
 }
 
+static void setup_virtio(void)
+{
+	int fd;
+
+	fd = open(AGENT_CTL_PATH, O_RDWR);
+	if (fd < 0)
+		die("Cannot open %s", AGENT_CTL_PATH);
+
+	communicate_with_listener_virt(fd);
+
+	/* Now create the handle through this socket */
+	virt_handle = tracecmd_create_init_fd_glob(fd, listed_events);
+	tracecmd_msg_finish_sending_metadata(fd);
+}
+
 static void finish_network(void)
 {
 	if (proto_ver == V2_PROTOCOL)
@@ -2003,6 +2033,13 @@ static void finish_network(void)
 	free(host);
 }
 
+static void finish_virt(void)
+{
+	tracecmd_msg_send_close_msg();
+	free(virt_handle);
+	free(virt_sfds);
+}
+
 static void start_threads(void)
 {
 	struct buffer_instance *instance;
@@ -2010,6 +2047,8 @@ static void start_threads(void)
 
 	if (host)
 		setup_network();
+	else if (virt)
+		setup_virtio();
 
 	/* make a thread for every CPU we have */
 	pids = malloc_or_die(sizeof(*pids) * cpu_count * (buffers + 1));
@@ -2079,6 +2118,9 @@ static void record_data(char *date2ts)
 	if (host) {
 		finish_network();
 		return;
+	} else if (virt) {
+		finish_virt();
+		return;
 	}
 
 	if (latency)
@@ -2732,6 +2774,7 @@ static void record_all_events(void)
 }
 
 enum {
+	OPT_virt	= 252,
 	OPT_nosplice	= 253,
 	OPT_funcstack	= 254,
 	OPT_date	= 255,
@@ -2885,6 +2928,7 @@ void trace_record (int argc, char **argv)
 			{"date", no_argument, NULL, OPT_date},
 			{"func-stack", no_argument, NULL, OPT_funcstack},
 			{"nosplice", no_argument, NULL, OPT_nosplice},
+			{"virt", no_argument, NULL, OPT_virt},
 			{"help", no_argument, NULL, '?'},
 			{NULL, 0, NULL, 0}
 		};
@@ -3015,6 +3059,8 @@ void trace_record (int argc, char **argv)
 		case 'o':
 			if (host)
 				die("-o incompatible with -N");
+			if (virt)
+				die("-o incompatible with --virt");
 			if (!record && !extract)
 				die("start does not take output\n"
 				    "Did you mean 'record'?");
@@ -3046,6 +3092,8 @@ void trace_record (int argc, char **argv)
 		case 'N':
 			if (!record)
 				die("-N only available with record");
+			if (virt)
+				die("-N incompatible with --virt");
 			if (output)
 				die("-N incompatible with -o");
 			host = optarg;
@@ -3061,6 +3109,8 @@ void trace_record (int argc, char **argv)
 			instance->cpumask = optarg;
 			break;
 		case 't':
+			if (virt)
+				die("-t incompatible with --virt");
 			use_tcp = 1;
 			break;
 		case 'b':
@@ -3085,6 +3135,17 @@ void trace_record (int argc, char **argv)
 		case OPT_nosplice:
 			recorder_flags |= TRACECMD_RECORD_NOSPLICE;
 			break;
+		case OPT_virt:
+			if (!record)
+				die("--virt only available with record");
+			if (host)
+				die("--virt incompatible with -N");
+			if (output)
+				die("--virt incompatible with -o");
+			if (use_tcp)
+				die("--virt incompatible with -t");
+			virt = true;
+			break;
 		default:
 			usage(argv);
 		}
diff --git a/trace-usage.c b/trace-usage.c
index f96a5ba..45865f0 100644
--- a/trace-usage.c
+++ b/trace-usage.c
@@ -19,7 +19,7 @@ static struct usage_help usage_help[] = {
 		" %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n"
 		"           [-s usecs][-O option ][-l func][-g func][-n func] \\\n"
 		"           [-P pid][-N host:port][-t][-r prio][-b size][-B buf][command ...]\n"
-		"           [-m max]\n"
+		"           [-m max][--virt]\n"
 		"          -e run command with event enabled\n"
 		"          -f filter for previous -e event\n"
 		"          -R trigger for previous -e event\n"
@@ -48,6 +48,7 @@ static struct usage_help usage_help[] = {
 		"          -i do not fail if an event is not found\n"
 		"          --func-stack perform a stack trace for function tracer\n"
 		"             (use with caution)\n"
+		"	   --virt to connect to virt-server\n"
 	},
 	{
 		"start",

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ