lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 26 Apr 2017 10:01:35 -0400
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Pratyush Anand <panand@...hat.com>
Cc:     linux-kernel@...r.kernel.org, dyoung@...hat.com, xlpang@...hat.com
Subject: Re: [trace-cmd Patch RFC] trace-cmd: top: A new interface to detect
 peak memory


Sorry for the late reply. I finally have time to start looking at
trace-cmd again.

On Wed,  1 Mar 2017 20:32:37 +0530
Pratyush Anand <panand@...hat.com> wrote:

> A new interface "trace-cmd top" has been introduced in this patch to
> know peak memory usage of any task.
> 
> $ trace-cmd top -s
> It will start to gather statistics of all the process which will be
> scheduled after this command execution.
> $ trace-cmd top -p
> It will print peak memory usage(in KB) against each scheduled task.
> $ trace-cmd top -e
> It will print peak memory usage(in KB) against each scheduled task and
> will stop gathering statistics. It will also kill the child process
> initiated by -s option.
> 
> $ ./trace-cmd top -s
> $ ./trace-cmd top -e
> 
> comm            peak memory(in KB)
> NetworkManager          15912
> gnome-shell             114292
> gnome-shell             186356
> firefox                 853244
> bash                    5984
> thunderbird             1896396
> kworker/3:0             0
> kworker/u16:1           0
> trace-cmd               256

This is as nice idea, but it needs some clean ups. I tried it out just
to play with it.

First, I just did:

 trace-cmd top

And nothing happened. Either a help message needs to be printed, or it
starts something. Maybe have it be interactive, that is, print out data
as it comes in?

Next I did:

# trace-cmd top -s
# trace-cmd top -p

comm		peak memory(in KB)
systemd         	9436
rcuc/0          	0
rcuc/1          	0
rcuc/2          	0
rcuc/3          	0
irq/24-ahci[000 	0
jbd2/dm-0-8     	232
systemd-journal 	88704
smartd          	4208
avahi-daemon    	3960
dbus-daemon     	4504
in:imjournal    	51888
gdbus           	15468
irq/26-em1      	0
dhclient        	19328
rs:main Q:Reg   	51888
sendmail        	4272
sendmail        	3792
sshd            	5676
bash            	14796
kworker/2:2     	1448
kworker/u8:0    	904
emacs           	66852
trace-cmd       	20792


But then for some reason it -p stopped doing anything. And -e didn't
stop it either. But after killing it manually and removing the pid
file, it appeared to work normal again. I can't seem to repeat it, so
lets not worry about that now.

More below.

> 
> Signed-off-by: Pratyush Anand <panand@...hat.com>
> ---
>  Makefile       |   2 +-
>  trace-cmd.c    |   4 +
>  trace-local.h  |   3 +
>  trace-record.c |   2 +-
>  trace-top.c    | 620 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  trace-usage.c  |   8 +
>  6 files changed, 637 insertions(+), 2 deletions(-)
>  create mode 100644 trace-top.c
> 
> diff --git a/Makefile b/Makefile
> index 62cb25b6c1bd..5324c141d3d6 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -331,7 +331,7 @@ TRACE_GUI_OBJS = trace-filter.o trace-compat.o trace-filter-hash.o trace-dialog.
>  		trace-xml.o
>  TRACE_CMD_OBJS = trace-cmd.o trace-record.o trace-read.o trace-split.o trace-listen.o \
>  	 trace-stack.o trace-hist.o trace-mem.o trace-snapshot.o trace-stat.o \
> -	 trace-hash.o trace-profile.o trace-stream.o
> +	 trace-hash.o trace-profile.o trace-stream.o trace-top.o
>  TRACE_VIEW_OBJS = trace-view.o trace-view-store.o
>  TRACE_GRAPH_OBJS = trace-graph.o trace-plot.o trace-plot-cpu.o trace-plot-task.o
>  TRACE_VIEW_MAIN_OBJS = trace-view-main.o $(TRACE_VIEW_OBJS) $(TRACE_GUI_OBJS)
> diff --git a/trace-cmd.c b/trace-cmd.c
> index 1a62faaf71ca..66e0f7cd37bc 100644
> --- a/trace-cmd.c
> +++ b/trace-cmd.c
> @@ -495,6 +495,10 @@ int main (int argc, char **argv)
>  		trace_stat(argc, argv);
>  		exit(0);
>  
> +	} else if (strcmp(argv[1], "top") == 0) {
> +		trace_top(argc, argv);
> +		exit(0);
> +
>  	} else if (strcmp(argv[1], "options") == 0) {
>  		show_plugin_options();
>  		exit(0);
> diff --git a/trace-local.h b/trace-local.h
> index 62363d0f9248..c9eac07da79a 100644
> --- a/trace-local.h
> +++ b/trace-local.h
> @@ -74,6 +74,8 @@ void trace_mem(int argc, char **argv);
>  
>  void trace_stat(int argc, char **argv);
>  
> +void trace_top(int argc, char **argv);
> +
>  struct hook_list;
>  
>  int trace_profile_record(struct tracecmd_input *handle,
> @@ -184,5 +186,6 @@ void update_first_instance(struct buffer_instance *instance, int topt);
>  
>  void show_instance_file(struct buffer_instance *instance, const char *name);
>  int count_cpus(void);
> +char *read_file(char *file, int *psize);
>  
>  #endif /* __TRACE_LOCAL_H */
> diff --git a/trace-record.c b/trace-record.c
> index 22b6835c0d5b..b2ce59e7eb4f 100644
> --- a/trace-record.c
> +++ b/trace-record.c
> @@ -3323,7 +3323,7 @@ static char *read_instance_file(struct buffer_instance *instance, char *file, in
>  	return buf;
>  }
>  
> -static char *read_file(char *file, int *psize)
> +char *read_file(char *file, int *psize)
>  {
>  	return read_instance_file(&top_instance, file, psize);
>  }
> diff --git a/trace-top.c b/trace-top.c
> new file mode 100644
> index 000000000000..056ae669856a
> --- /dev/null
> +++ b/trace-top.c
> @@ -0,0 +1,620 @@
> +/*
> + * Copyright (C) 2017 Red Hat Inc, Pratyush Anand <panand@...hat.com>
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License (not later!)
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not,  see <http://www.gnu.org/licenses>
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + * There are several scenarios where we land into oom-killer in the early
> + * boot process, specially in a memory constrained environment. It becomes
> + * very difficult to identify the user space task which required more
> + * memory compared to their previous released versions. This interface is
> + * an attempt to debug such issues, which will help us to identify peak
> + * memory usage of each task. mm_page_alloc() and mm_page_free() are lowest
> + * level of kernel APIs which allocates and frees memory from buddy. This
> + * tool enables tracepoint of these two functions and then keeps track of
> + * peak memory usage of each task.If a task was already running before this
> + * tool was started then, it initializes peak memory of that task with
> + * corresponding vmRSS component from /proc/$tid/statm
> + * If task was insmod/modprobe then it also appends module name (max 16
> + * char) in comm.
> + *
> + * There could still be some cma and memblock allocations which may not be
> + * tracked using this tool. Moreover, this may not be the exact peak memory
> + * estimation, rather an approximate value.
> + *
> + * usage:
> + * $ trace-cmd top -s
> + *	It will start gather statistics of all the process which will be
> + *	scheduled after this command execution.
> + * $ trace-cmd top -p
> + *	It will print peak memory usage(in KB) against each scheduled task.
> + * $ trace-cmd top -e
> + * It will print peak memory usage(in KB) against each scheduled task and
> + * will stop gathering statistics. It will also kill the child process
> + * initiated by -s option.
> + */
> +
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <getopt.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +
> +#include "trace-local.h"
> +#include "trace-msg.h"
> +#include "kbuffer.h"
> +
> +#define TRACE_CMD_TOP_PID_FILE	"/tmp/trace-cmd-top-pid"

pid files belong in /var/run

> +static struct pevent *pevent;
> +static struct kbuffer *kbuf;
> +static struct pevent_record *record;
> +static int kmem_mm_page_alloc_id;
> +static int kmem_mm_page_free_id;
> +static bool set_print_only;
> +static bool set_print_and_exit;
> +
> +struct top_task_stats {
> +	pid_t pid;
> +	long cur_mem;
> +	long peak_mem;
> +	char *comm;
> +};
> +
> +static struct top_task_stats *top_task_stats;
> +static int top_task_cnt;
> +
> +void top_disable_events(void)
> +{
> +	char *path;
> +	char c;
> +	int fd;
> +	int ret;
> +
> +	/*
> +	 * WARNING: We want only few events related to memory allocation to
> +	 * be enabled. Disable all events here. Latter, selective events
> +	 * would be enabled

trace-cmd reset does this. You may want to call that instead of doing
it manually here. Someone else wanted to pull out trace-cmd reset into
its own file. I'll have to get those patches. But then we can call that
and enable tracing again.

Or better yet, if we are only tracing memory events, just create your
own instance and use that. Why bother with normal tracing?

 mkdir /sys/kernel/debug/tracing/instances/trace-cmd-memory

> +	 */
> +	c = '0';
> +	path = get_instance_file(&top_instance, "events/enable");
> +	fd = open(path, O_WRONLY);
> +	if (fd < 0)
> +		die("failed to open '%s'", path);
> +	ret = write(fd, &c, 1);
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +	if (ret < 1)
> +		die("failed to write 0 to events/enable");
> +
> +	path = get_instance_file(&top_instance, "tracing_on");
> +	fd = open(path, O_WRONLY);
> +	if (fd < 0)
> +		die("failed to open '%s'", path);
> +	ret = write(fd, &c, 1);
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +	if (ret < 1)
> +		die("failed to write 0 to tracing_on\n");
> +
> +	path = get_instance_file(&top_instance, "set_event");
> +	fd = open(path, O_WRONLY);
> +	if (fd < 0)
> +		die("failed to open '%s'", path);
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +}
> +
> +void top_exit(void)
> +{
> +	int i;
> +	/*
> +	 * free all the dynamic memories which could have been allocated by
> +	 * this program
> +	 */
> +	if (kbuf)
> +		kbuffer_free(kbuf);
> +	if (record)
> +		free_record(record);
> +	if (pevent)
> +		pevent_free(pevent);

The above three functions all can handle a NULL pointer. Remove the if
statements.

> +	if (top_task_stats) {
> +		for (i = 0; i < top_task_cnt; i++)
> +			free(top_task_stats[i].comm);
> +		free(top_task_stats);
> +	}
> +
> +	top_disable_events();
> +	remove(TRACE_CMD_TOP_PID_FILE);
> +}
> +
> +void top_initialize_events(void)
> +{
> +	char *path;
> +	int fd;
> +	char c;
> +	int ret;
> +	char *buf;
> +	int size;
> +	enum kbuffer_long_size long_size;
> +	enum kbuffer_endian endian;
> +	char id_str[8];
> +

Might want to look at trace_stream_init() and use that instead. It is
made for live streaming of events.

> +	/* trace data is read from the trace_raw_pipe directly */
> +	if (tracecmd_host_bigendian())
> +		endian = KBUFFER_ENDIAN_BIG;
> +	else
> +		endian = KBUFFER_ENDIAN_LITTLE;
> +
> +	if (sizeof(long) == 8)
> +		long_size = KBUFFER_LSIZE_8;
> +	else
> +		long_size = KBUFFER_LSIZE_4;
> +
> +	/* allocate global structures which will be needed during parsing */
> +	kbuf = kbuffer_alloc(long_size, endian);
> +	if (!kbuf) {
> +		warning("failed to allocate kbuf\n");
> +		goto free_event;
> +	}
> +	record = malloc(sizeof(*record));
> +	if (!record) {
> +		warning("failed to allocate record\n");
> +		goto free_event;
> +	}
> +	pevent = pevent_alloc();
> +	if (!pevent) {
> +		warning("failed to allocate pevent\n");
> +		goto free_event;
> +	}
> +	/* populate pevent structure */
> +	buf = read_file("events/header_page", &size);
> +	if (!buf) {
> +		warning("failed to read header page\n");
> +		goto free_event;
> +	}
> +	/* extract information from header page */
> +	ret = pevent_parse_header_page(pevent, buf, size,
> +					sizeof(unsigned long));
> +	free(buf);
> +	if (ret < 0) {
> +		warning("failed to parse header page\n");
> +		goto free_event;
> +	}
> +	/* extract format of events which will be enabled */
> +	buf = read_file("events/kmem/mm_page_alloc/format", &size);
> +	if (!buf) {
> +		warning("failed to read mm_page_alloc format\n");
> +		goto free_event;
> +	}
> +	ret = pevent_parse_event(pevent, buf, size, "kmem");
> +	free(buf);
> +	if (ret < 0) {
> +		warning("failed to parse mm_page_alloc event\n");
> +		goto free_event;
> +	}
> +	buf = read_file("events/kmem/mm_page_free/format", &size);
> +	if (!buf) {
> +		warning("failed to read mm_page_free format\n");
> +		goto free_event;
> +	}
> +	ret = pevent_parse_event(pevent, buf, size, "kmem");
> +	free(buf);
> +	if (ret < 0) {
> +		warning("failed to parse mm_page_free event\n");
> +		goto free_event;
> +	}
> +	/* set needed events */
> +	path = get_instance_file(&top_instance, "set_event");
> +	fd = open(path, O_WRONLY);
> +	if (fd < 0) {
> +		warning("failed to open '%s'\n", path);
> +		goto free_path;
> +	}
> +	ret = write(fd, "kmem:mm_page_alloc", strlen("kmem:mm_page_alloc"));
> +	if (ret != strlen("kmem:mm_page_alloc")) {
> +		warning("failed to set kmem:mm_page_alloc event\n");
> +		goto close_fd;
> +	}
> +	ret = write(fd, "kmem:mm_page_free", strlen("kmem:mm_page_free"));
> +	if (ret != strlen("kmem:mm_page_free")) {
> +		warning("failed to set kmem:mm_page_free event\n");
> +		goto close_fd;
> +	}
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +	/* store ID of needed events, will be used to compare*/
> +	path = get_instance_file(&top_instance, "events/kmem/mm_page_alloc/id");
> +	fd = open(path, O_RDONLY);
> +	if (fd < 0) {
> +		warning("failed to open '%s'\n", path);
> +		goto free_event;
> +	}
> +	ret = read(fd, id_str, 8);
> +	if (ret < 1) {
> +		warning("failed to read events/kmem/mm_page_alloc/id\n");
> +		goto close_fd;
> +	}
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +	kmem_mm_page_alloc_id = atoi(id_str);
> +	path = get_instance_file(&top_instance, "events/kmem/mm_page_free/id");
> +	fd = open(path, O_RDONLY);
> +	if (fd < 0) {
> +		warning("failed to open '%s'\n", path);
> +		goto free_event;
> +	}
> +	ret = read(fd, id_str, 8);
> +	if (ret < 1) {
> +		warning("failed to read events/kmem/mm_page_free/id\n");
> +		goto close_fd;
> +	}
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +	kmem_mm_page_free_id = atoi(id_str);
> +	/* enable tracing */
> +	c = '1';
> +	path = get_instance_file(&top_instance, "tracing_on");
> +	fd = open(path, O_WRONLY);
> +	if (fd < 0) {
> +		warning("failed to open '%s'\n", path);
> +		goto free_event;
> +	}
> +	ret = write(fd, &c, 1);
> +	if (ret < 1) {
> +		warning("failed to write 0 in tracing_on file\n");
> +		goto close_fd;
> +	}
> +	close(fd);
> +	tracecmd_put_tracing_file(path);
> +
> +	return;
> +
> +close_fd:
> +	close(fd);
> +free_path:
> +	tracecmd_put_tracing_file(path);
> +free_event:
> +	top_exit();
> +	die("error with event initialization");
> +}
> +
> +void top_print_stats()
> +{
> +	FILE *fp = stdout;
> +	int task;
> +
> +	fprintf(fp, "\ncomm\t\tpeak memory(in KB)\n");
> +	for (task = 0; task < top_task_cnt; task++)
> +		fprintf(fp, "%-16s\t%ld\n",
> +			top_task_stats[task].comm,
> +			top_task_stats[task].peak_mem * (page_size >> 10));
> +}
> +
> +void top_print_handler(int sig_num)
> +{
> +	if (sig_num == SIGUSR1)
> +		set_print_only = true;
> +	else if (sig_num == SIGUSR2)
> +		set_print_and_exit = true;
> +}
> +
> +int top_pid_cmp(const void *a, const void *b)
> +{
> +	const struct top_task_stats *sa = a;
> +	const struct top_task_stats *sb = b;
> +
> +	if (sa->pid > sb->pid)
> +		return 1;
> +	if (sa->pid < sb->pid)
> +		return -1;
> +	return 0;
> +}
> +
> +int top_get_pid_comm_rss(char *comm, long *rss, pid_t pid)
> +{
> +	char proc_statm[21];
> +	char rss_str[9];
> +	char proc_comm[20];
> +	char proc_cmdline[23];
> +	char cmdline[24], *cmd;
> +	int fd, i = 0, len;
> +
> +	/* read rss from /proc/pid/statm */
> +	sprintf(proc_statm, "/proc/%d/statm", pid);
> +	fd = open(proc_statm, O_RDONLY);
> +	if (fd < 0)
> +		return -1;
> +	/* ignore first entry */
> +	do {
> +		read(fd, rss_str, 1);
> +	} while (!isspace(rss_str[0]));
> +	/* read second entry:rss */
> +	do {
> +		read(fd, &rss_str[i], 1);
> +	} while (!isspace(rss_str[i++]));
> +	rss_str[i] = '\0';
> +	*rss = atol(rss_str);
> +	close(fd);
> +	/* read comm from /proc/pid/comm */
> +	sprintf(proc_comm, "/proc/%d/comm", pid);
> +	fd = open(proc_comm, O_RDONLY);
> +	if (fd < 0)
> +		return -1;
> +	len = read(fd, comm, 16);
> +	close(fd);
> +	comm[len - 1] = '\0';
> +	if (!strcmp(comm, "insmod") || !strcmp(comm, "modprobe")) {
> +		/* read comm from /proc/pid/cmdline */
> +		sprintf(proc_cmdline, "/proc/%d/cmdline", pid);
> +		fd = open(proc_cmdline, O_RDONLY);
> +		if (fd < 0)
> +			return -1;
> +		/*
> +		 * cmdline would be something like insmodvfat.ko. A module
> +		 * name could be greater than 16, but copy a max of 16. So
> +		 * a max of 8 bytes for insmod/modprobe and 16 for module
> +		 * name
> +		 */
> +		cmd = cmdline;
> +		len = read(fd, cmd, 24);
> +		close(fd);
> +		cmd[len - 1] = '\0';
> +		cmd += strlen(cmd) + 1;
> +		comm = strcat(comm, "-");
> +		comm = strcat(comm, cmd);
> +		/* now comm should look like insmod-vfat.ko */
> +	}
> +
> +	return 0;
> +}
> +
> +void top_update_pid_stats(pid_t pid, long memory, bool inc)
> +{
> +	/* 16 for task name and 16 for module name and 1 for separator */
> +	char comm[16 + 16 + 1];
> +	long rss;
> +	struct top_task_stats *statp, stat;
> +
> +	/* check, if we have got already an entry for this pid in trace */
> +	stat.pid = pid;
> +	statp = bsearch(&stat, top_task_stats, top_task_cnt,
> +			sizeof(struct top_task_stats), top_pid_cmp);
> +	if (!statp) {
> +		/*
> +		 * if there is no entry in /proc for this pid yet, then no
> +		 * need to take care of memory consumption of the task. We
> +		 * will anyway, have those information in statm/rss when
> +		 * /proc entry will be created
> +		 */
> +		if (top_get_pid_comm_rss(comm, &rss, pid))
> +			return;
> +		top_task_stats = realloc(top_task_stats, sizeof(*top_task_stats) *
> +				(top_task_cnt + 1));
> +		if (!top_task_stats)
> +			die("Failed to allocate memory for new task");
> +		statp = top_task_stats + top_task_cnt++;
> +		statp->pid = pid;
> +		statp->cur_mem = rss;
> +		statp->peak_mem = rss;
> +		statp->comm = strdup(comm);
> +		qsort(top_task_stats, top_task_cnt, sizeof(struct top_task_stats),
> +				top_pid_cmp);
> +	}
> +	if (inc) {
> +		statp->cur_mem += memory;
> +		if (statp->peak_mem < statp->cur_mem)
> +			statp->peak_mem = statp->cur_mem;
> +	} else {
> +		statp->cur_mem -= memory;
> +	}
> +}
> +
> +void top_analyze_event(struct event_format *event, pid_t pid, int id)
> +{
> +	struct format_field *field;
> +	int order;
> +
> +	if (id == kmem_mm_page_alloc_id || id == kmem_mm_page_free_id) {
> +		field = pevent_find_field(event, "order");
> +		if (field) {
> +			order = *(int *)(record->data + field->offset);
> +			top_update_pid_stats(pid, 2 << order,
> +					 id == kmem_mm_page_alloc_id);
> +		}
> +	}
> +}
> +
> +void top_update_task_stats(void *page, int size, int cpu)
> +{
> +	unsigned long long ts;
> +	void *ptr;
> +	int id;
> +	pid_t pid;
> +	struct event_format *event;
> +
> +	/* load page in kbuffer */
> +	kbuffer_load_subbuffer(kbuf, page);
> +	if (kbuffer_subbuffer_size(kbuf) > size) {
> +		warning("%s: page_size > size\n", __func__);
> +		return;
> +	}
> +
> +	do {
> +		/* process each event in the page */
> +		ptr = kbuffer_next_event(kbuf, NULL);
> +		if (!ptr)
> +			break;
> +		ptr = kbuffer_read_event(kbuf, &ts);
> +		if (!ptr)
> +			break;
> +		memset(record, 0, sizeof(*record));
> +		record->ts = ts;
> +		record->size = kbuffer_event_size(kbuf);
> +		record->record_size = kbuffer_curr_size(kbuf);
> +		record->cpu = cpu;
> +		record->data = ptr;
> +		record->ref_count = 1;
> +		id = pevent_data_type(pevent, record);
> +		pid = pevent_data_pid(pevent, record);
> +		event = pevent_data_event_from_type(pevent, id);
> +		if (event)
> +			top_analyze_event(event, pid, id);
> +	} while (ptr < page + size);
> +}
> +
> +void top_process_events(void)
> +{
> +	struct dirent *dent;
> +	void *page;
> +	char *path;
> +	char *file;
> +	DIR *dir;
> +	int len;
> +	int fd;
> +	int r;
> +
> +	path = tracecmd_get_tracing_file("per_cpu");
> +	if (!path)
> +		die("Failed to allocate per_cpu info");
> +
> +	dir = opendir(path);
> +	if (!dir)
> +		die("Failed to open per_cpu directory");
> +
> +	len = strlen(path);
> +	file = malloc(len + strlen("trace_pipe_raw") + 32);
> +	page = malloc(page_size);
> +	if (!file || !page)
> +		die("Failed to allocate trace_pipe_raw info");
> +
> +	while ((dent = readdir(dir))) {
> +		const char *name = dent->d_name;
> +
> +		if (strncmp(name, "cpu", 3) != 0)
> +			continue;
> +
> +		sprintf(file, "%s/%s/trace_pipe_raw", path, name);
> +		fd = open(file, O_RDONLY | O_NONBLOCK);
> +		if (fd < 0)
> +			continue;
> +		do {
> +			r = read(fd, page, page_size);
> +			if (r > 0)
> +				top_update_task_stats(page, r, atoi(&name[3]));
> +		} while (r > 0);
> +		close (fd);
> +	}
> +	free(file);
> +	free(page);
> +	closedir(dir);
> +	tracecmd_put_tracing_file(path);
> +}
> +
> +void top_start(void)
> +{
> +	pid_t pid;
> +	int fd, ret;
> +
> +	top_disable_events();
> +	top_initialize_events();
> +	page_size = getpagesize();
> +
> +	pid = fork();
> +	if (!pid) {
> +		do {
> +			top_process_events();
> +			sleep(1);

We should probably just sleep on the buffer, waiting for data.

> +			if (set_print_only) {
> +				top_print_stats();
> +				set_print_only = false;
> +			}
> +			if (set_print_and_exit) {
> +				top_print_stats();
> +				top_exit();
> +				break;
> +			}
> +		} while (1);
> +	} else {
> +		fd = open(TRACE_CMD_TOP_PID_FILE, O_WRONLY | O_CREAT);
> +		if (fd < 0)
> +			goto kill_child;
> +		ret = write(fd, &pid, sizeof(pid));
> +		if (ret < sizeof(pid))
> +			goto close_fd;
> +		close(fd);
> +	}
> +
> +	return;
> +
> +close_fd:
> +	close(fd);
> +kill_child:
> +	kill(pid, SIGUSR2);
> +	die("Failed to start trace-top");
> +	return;
> +}
> +
> +void trace_top (int argc, char **argv)
> +{
> +	int c, fd;
> +	struct sigaction sa;
> +	pid_t pid;
> +
> +	memset (&sa, 0, sizeof (sa));
> +	sa.sa_handler = &top_print_handler;
> +	sigaction (SIGUSR1, &sa, NULL);
> +	sigaction (SIGUSR2, &sa, NULL);
> +
> +	for (;;) {
> +		c = getopt(argc-1, argv+1, "hsep");
> +		if (c == -1)
> +			break;
> +		switch (c) {
> +		case 'h':
> +			usage(argv);
> +			break;
> +		case 's':
> +			fd = open(TRACE_CMD_TOP_PID_FILE, O_RDONLY);
> +			if (fd >= 0) {
> +				close(fd);
> +				die("trace-cmd top already running");
> +			}
> +			top_start();
> +			break;
> +		case 'e':
> +			fd = open(TRACE_CMD_TOP_PID_FILE, O_RDONLY);
> +			if (fd < 0)
> +				die("trace-cmd top not running");
> +			read(fd, &pid, sizeof(pid));
> +			kill(pid, SIGUSR2);
> +			close(fd);
> +			break;
> +		case 'p':
> +			fd = open(TRACE_CMD_TOP_PID_FILE, O_RDONLY);
> +			if (fd < 0)
> +				die("trace-cmd top not running");
> +			read(fd, &pid, sizeof(pid));
> +			kill(pid, SIGUSR1);
> +			close(fd);
> +			break;
> +		default:
> +			usage(argv);
> +		}
> +	}
> +
> +	exit(0);
> +}
> diff --git a/trace-usage.c b/trace-usage.c
> index 5c1a69255581..30e301b12fef 100644
> --- a/trace-usage.c
> +++ b/trace-usage.c
> @@ -196,6 +196,14 @@ static struct usage_help usage_help[] = {
>  		" %s stat"
>  	},
>  	{
> +		"top",
> +		"shows peak memory usage statistics of task and kernel module",
> +		" %s top [-s] [-e] [-p]\n"
> +		"          -s start capture statistics\n"
> +		"          -e end capture statistics\n"
> +		"          -p print statistics on console\n"
> +	},
> +	{

Looks good, I think we can extend on this.

-- Steve

>  		"split",
>  		"parse a trace.dat file into smaller file(s)",
>  		" %s split [options] -o file [start [end]]\n"

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ