lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1364394409.5053.78.camel@laptop>
Date:	Wed, 27 Mar 2013 15:26:49 +0100
From:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
To:	Roberto Vitillo <ravitillo@....gov>
Cc:	linux-kernel@...r.kernel.org, paulus@...ba.org, mingo@...hat.com,
	acme@...stprotocols.net, namhyung@...nel.org
Subject: Re: [PATCH v2] perf: add callgrind conversion tool


This sort of reminds me of another little proglet I have lying about
that might need a home..


---
/*
 * Library to hook into code compiled with -finstrument-functions it will
 * record function arcs (call_fn, this_fn) as well as the sum of whatever event
 * is being measured over that function.
 *
 * Copyright (C) 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@...hat.com>
 *
 * This file is copyrighted under the GPLv2 License (and not any later version).
 *
 * SuperFastHash under LGPLv2.1 (http://www.azillionmonkeys.com/qed/hash.html)
 *
 * Compilation example:
 * gcc -shared -fPIC profviz.c -o profviz.so -lpthread -ldl -lelf
 *
 * Usage example:
 * LD_PRELOAD=./profviz.so your_program 
 */

#define _GNU_SOURCE

#include "util/util.h"
#include "perf.h"
#include "util/parse-events.h"

#include <sys/time.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <link.h>
#include <math.h>

#define barrier() asm volatile("" ::: "memory")

static u64 rdpmc(unsigned int counter)
{
	unsigned int low, high;

	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));

	return low | ((u64)high) << 32;
}

static u64 rdtsc(void)
{
	unsigned int low, high;

	asm volatile("rdtsc" : "=a" (low), "=d" (high));

	return low | ((u64)high) << 32;
}

static u64 mmap_read_self(void *addr)
{
	struct perf_event_mmap_page *pc = addr;
	u32 seq, idx, time_mult = 0, time_shift = 0, width = 0;
	u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
	s64 pmc = 0;

	do {
		seq = pc->lock;
		barrier();

		enabled = pc->time_enabled;
		running = pc->time_running;

		if (pc->cap_usr_time && enabled != running) {
			cyc = rdtsc();
			time_mult = pc->time_mult;
			time_shift = pc->time_shift;
			time_offset = pc->time_offset;
		}

		idx = pc->index;
		count = pc->offset;
		if (pc->cap_usr_rdpmc && idx) {
			width = pc->pmc_width;
			pmc = rdpmc(idx - 1);
		}

		barrier();
	} while (pc->lock != seq);

	if (idx) {
		pmc <<= 64 - width;
		pmc >>= 64 - width; /* shift right signed */
		count += pmc;
	}

	if (enabled != running) {
		u64 quot, rem;

		quot = (cyc >> time_shift);
		rem = cyc & ((1 << time_shift) - 1);
		delta = time_offset + quot * time_mult +
			((rem * time_mult) >> time_shift);

		enabled += delta;
		if (idx)
			running += delta;

		quot = count / running;
		rem = count % running;
		count = quot * enabled + (rem * enabled) / running;
	}

	return count;
}

static int (*pthread_create_orig)(pthread_t *__restrict,
		__const pthread_attr_t *__restrict,
		void *(*)(void *),
		void *__restrict) = NULL;

static struct perf_event_attr perf_attr = {
	.type = PERF_TYPE_HARDWARE,
	.config = PERF_COUNT_HW_CPU_CYCLES,
	.exclude_kernel = 1,
};

struct prof_arc {
	void *call_fn;
	void *this_fn;
	uint64_t count;
};

struct prof_fn {
	void *this_fn;
	uint64_t count;
};

#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
		                       +(uint32_t)(((const uint8_t *)(d))[0]) )

static uint32_t SuperFastHash (const char * data, int len, uint32_t hash) {
	uint32_t tmp;
	int rem;

	if (len <= 0 || data == NULL) return 0;

	rem = len & 3;
	len >>= 2;

	/* Main loop */
	for (;len > 0; len--) {
		hash  += get16bits (data);
		tmp    = (get16bits (data+2) << 11) ^ hash;
		hash   = (hash << 16) ^ tmp;
		data  += 2*sizeof (uint16_t);
		hash  += hash >> 11;
	}

	/* Handle end cases */
	switch (rem) {
		case 3: hash += get16bits (data);
			hash ^= hash << 16;
			hash ^= data[sizeof (uint16_t)] << 18;
			hash += hash >> 11;
			break;
		case 2: hash += get16bits (data);
			hash ^= hash << 11;
			hash += hash >> 17;
			break;
		case 1: hash += *data;
			hash ^= hash << 10;
			hash += hash >> 1;
	}

	/* Force "avalanching" of final 127 bits */
	hash ^= hash << 3;
	hash += hash >> 5;
	hash ^= hash << 4;
	hash += hash >> 17;
	hash ^= hash << 25;
	hash += hash >> 6;

	return hash;
}

void die(const char *err, ...)
{
	va_list params;

	va_start(params, err);
	vfprintf(stderr, err, params);
	va_end(params);

	exit(-1);
}


#define HASH_TABLE_SIZE		16384
#define AVG_STACK_DEPTH		8

#define ARC_HASH_SIZE	(HASH_TABLE_SIZE * AVG_STACK_DEPTH)

static struct prof_arc prof_arc_hash[ARC_HASH_SIZE];
static pthread_mutex_t prof_arc_lock;

#define FN_HASH_SIZE	(HASH_TABLE_SIZE)

static struct prof_fn  prof_fn_hash[FN_HASH_SIZE];
static pthread_mutex_t prof_fn_lock;

#define HASH_INIT		0x9e370001UL	
#define HASH_CHAIN		16

static inline void *
prof_hash_find(void *hash_base, const size_t hash_size, 
		const void *key, const size_t key_size,
		const size_t entry_size, pthread_mutex_t *lock)

{
	uint32_t hash = HASH_INIT;
	void *entry = NULL;
	int i, j;

	for (i = 0; i < HASH_CHAIN; i++) {
		hash = SuperFastHash(key, key_size, hash);
		entry = hash_base + entry_size * (hash % FN_HASH_SIZE);
		if (!memcmp(entry, key, key_size))
			return entry;

		for (j = 0; j < key_size; j++) {
			if (*((char *)entry + j))
				goto next_1;
		}

		goto found_empty;
next_1:
		continue;
	}
	die("fn_hash too full");

found_empty:
	hash = HASH_INIT;
	pthread_mutex_lock(lock);
	for (i = 0; i < HASH_CHAIN; i++) {
		hash = SuperFastHash(key, key_size, hash);
		entry = hash_base + entry_size * (hash % FN_HASH_SIZE);

		if (!memcmp(entry, key, key_size))
			goto unlock;

		for (j = 0; j < key_size; j++) {
			if (*((char *)entry + j))
				goto next_2;
		}

		memcpy(entry, key, key_size);
		goto unlock;
next_2:
		continue;
	}
	die("fn_hash too full (locked)");
unlock:
	pthread_mutex_unlock(lock);

	return entry;
}

static struct prof_fn *prof_fn_find(void *this_fn)
{
	return prof_hash_find(prof_fn_hash, FN_HASH_SIZE,
			&this_fn, sizeof(this_fn), sizeof(struct prof_fn),
			&prof_fn_lock);
}

static struct prof_arc *prof_arc_find(void *this_fn, void *call_fn)
{
	struct prof_arc arc = { .this_fn = this_fn, .call_fn = call_fn, };

	return prof_hash_find(prof_arc_hash, ARC_HASH_SIZE,
			&arc, 2*sizeof(void *), sizeof(struct prof_arc),
			&prof_arc_lock);
}

static unsigned long page_size;

struct prof_stack {
	struct prof_arc *arc;
	struct prof_fn *fn;
	uint64_t stamp;
};

static __thread struct prof_stack prof_stack[128];
static __thread int prof_stack_idx = 0;

static __thread void *perf_event;

static void thread_init(void)
{
	int fd;

	fd = sys_perf_event_open(&perf_attr, 0, -1, -1, 0);
	if (fd < 0)
		die("failed to create perf_event");

	perf_event = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
	if (perf_event == (void *)(-1))
		die("failed to mmap perf_event");

	close(fd);

	prof_stack_idx = 0;
}

static u64 first_count;

void prof_init(void) __attribute__((constructor));
void prof_init(void)
{
	char *event_str;

	page_size = sysconf(_SC_PAGESIZE);

	pthread_create_orig = dlsym(RTLD_NEXT, "pthread_create");
	if (!pthread_create_orig) {
		char *error = dlerror();
		if (!error)
			error = "pthread_create is NULL";
		die("%s\n", error);
	}

	memset(prof_arc_hash, 0, sizeof(prof_arc_hash));
	pthread_mutex_init(&prof_arc_lock, NULL);

	memset(prof_fn_hash, 0, sizeof(prof_fn_hash));
	pthread_mutex_init(&prof_fn_lock, NULL);

	event_str = getenv("PROF_EVENT");
	if (event_str)
		/* perf_attr = parse_attr_crap(event_str); */
		;

	thread_init(); /* main thread */

	first_count = mmap_read_self(perf_event);
}

struct prof_symbol {
	void *addr;
	unsigned long size;
	const char *name;
};

static struct prof_symbol *prof_symbols;
static unsigned long prof_nr_symbols;

static void prof_gelf(const char *name, void (*func)(Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, void *data), void *data)
{
	Elf         *elf;
	Elf_Scn     *scn = NULL;
	GElf_Shdr   shdr;
	int         fd;

	elf_version(EV_CURRENT);

	fd = open(name, O_RDONLY);
	elf = elf_begin(fd, ELF_C_READ, NULL);

	while ((scn = elf_nextscn(elf, scn)) != NULL) {
		gelf_getshdr(scn, &shdr);
		if (shdr.sh_type == SHT_SYMTAB) {
			func(elf, scn, &shdr, data);
		}
	}

	elf_end(elf);
	close(fd);
}

static void prof_gelf_nr_symbols(Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, void *data)
{
	unsigned long *nr_symbols = data;
	unsigned long count;

	count = shdr->sh_size / shdr->sh_entsize;
	*nr_symbols += count;
}

static int prof_count_symbols(struct dl_phdr_info *info, size_t size, void *data)
{
	prof_gelf(info->dlpi_name, prof_gelf_nr_symbols, data);
	return 0;
}

static void prof_gelf_load_symbols(Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, void *_info)
{
	struct dl_phdr_info *info = _info;
	unsigned long count = shdr->sh_size / shdr->sh_entsize;
	unsigned long i;
	Elf_Data *data;

	data = elf_getdata(scn, NULL);

	for (i = 0; i < count; i++) {
		GElf_Sym sym;
		char *name;
		struct prof_symbol *symbol = &prof_symbols[prof_nr_symbols];

		gelf_getsym(data, i, &sym);

		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
			continue;

		if (!sym.st_size)
			continue;

		name = elf_strptr(elf, shdr->sh_link, sym.st_name);
		if (!name)
			continue;

		symbol->name = strdup(name);
		symbol->addr = (void *)(sym.st_value + info->dlpi_addr);
		symbol->size = sym.st_size;

		prof_nr_symbols++;
	}
}

static int prof_load_symbols(struct dl_phdr_info *info, size_t size, void *data)
{
	prof_gelf(info->dlpi_name, prof_gelf_load_symbols, info);
	return 0;
}

static int prof_cmp_symbol(const void *_a, const void *_b)
{
	const struct prof_symbol *a = _a, *b = _b;

	if (a->addr < b->addr)
		return -1;

	if (a->addr > b->addr)
		return 1;

	return 0;
}

static void load_symbols(void)
{
	unsigned long nr_symbols = 0;
	struct dl_phdr_info dl_info = {
		.dlpi_name = "/proc/self/exe",
		.dlpi_addr = 0,
	};

	prof_count_symbols(&dl_info, sizeof(dl_info), &nr_symbols);
	dl_iterate_phdr(prof_count_symbols, &nr_symbols);

	prof_symbols = calloc(nr_symbols, sizeof(struct prof_symbol));

	prof_load_symbols(&dl_info, sizeof(dl_info), NULL);
	dl_iterate_phdr(prof_load_symbols, NULL);

	qsort(prof_symbols, prof_nr_symbols, 
			sizeof(struct prof_symbol), prof_cmp_symbol);
}

static struct prof_symbol *find_symbol(void *addr)
{
	struct prof_symbol *sym;
	unsigned long l, u, i;

	l = 0;
	u = prof_nr_symbols;

	while (l < u) {
		i = (l + u) / 2;
		sym = &prof_symbols[i];

		if (addr >= sym->addr && addr < sym->addr + sym->size)
			return sym;

		if (addr < sym->addr)
			u = i;
		else
			l = i + 1;
	}

	return NULL;
}

void prof_exit(void) __attribute__((destructor));
void prof_exit(void)
{
	FILE *file;
	int i;
	int64_t max_fn_count = 0;

	load_symbols();

	file = fopen("prof.dot", "w");
	if (!file)
		die("failed to create output file");

	fprintf(file, "#\n# first count: %lu\n#\n", first_count);

	/*
	 * Maybe replace "profile" with the argv
	 */
	fprintf(file, "digraph profile {\n");

	for (i = 0; i < ARC_HASH_SIZE; i++) {
		struct prof_arc *arc = &prof_arc_hash[i];
		struct prof_symbol *c_sym, *t_sym;
		struct prof_fn *c;
		double p;

		if (!(arc->call_fn && arc->this_fn))
			continue;

		c_sym = find_symbol(arc->call_fn);
		t_sym = find_symbol(arc->this_fn);

		if (!c_sym || !t_sym)
			die("symbols missing");

		c = prof_fn_find(c_sym->addr);
		if (!c)
			die("fn_hash|symtab borken");

		p = (double)arc->count / (double)c->count;
		fprintf(file, 
		"  \"%s\" -> \"%s\" [label=\"%f\", color=\"%.3f %.3f %.3f\"]\n",
			c_sym->name, t_sym->name, 100.0 * p,
			0.33, 1.0, p);
	}

	for (i = 0; i < ARC_HASH_SIZE; i++) {
		struct prof_arc *arc = &prof_arc_hash[i];
		struct prof_symbol *c_sym;
		struct prof_fn *c;
		double p;

		if (!(arc->call_fn && arc->this_fn))
			continue;

		c_sym = find_symbol(arc->call_fn);
		c = prof_fn_find(c_sym->addr);

		c->count -= arc->count;
	}

	for (i = 0; i < FN_HASH_SIZE; i++) {
		struct prof_fn *fn = &prof_fn_hash[i];
		struct prof_symbol *s;

		if (!fn->this_fn)
			continue;

		if ((int64_t)fn->count > max_fn_count)
			max_fn_count = fn->count;

		s = find_symbol(fn->this_fn);
//		fprintf(file, "#  %s %ld\n", s->name, fn->count);
	}

	for (i = 0; i < FN_HASH_SIZE; i++) {
		struct prof_fn *fn = &prof_fn_hash[i];
		struct prof_symbol *s;
		double p;

		if (!fn->this_fn)
			continue;

		s = find_symbol(fn->this_fn);
		p = (double)fn->count / (double)max_fn_count;
		fprintf(file, "  \"%s\" [color=\"%.3f %.3f %.3f\"]\n", 
				s->name, 0.0, 1.0, p);
	}

	fprintf(file, "}\n");
	fflush(file);
	fclose(file);
}

struct tramp_data {

	void *(*func)(void *);
	void *arg;

	pthread_mutex_t lock;
	pthread_cond_t  wait;
};

static void *tramp_func(void *data)
{
	struct tramp_data *tramp_data = data;
	void *(*func)(void *) = tramp_data->func;
	void *arg = tramp_data->arg;
	void *ret;

	thread_init();

	pthread_mutex_lock(&tramp_data->lock);
	pthread_cond_signal(&tramp_data->wait);
	pthread_mutex_unlock(&tramp_data->lock);

	ret = func(arg);

	munmap(perf_event, page_size);

	return ret;
}

/* hijack pthread_create() */
int pthread_create(pthread_t *__restrict thread,
		__const pthread_attr_t *__restrict attr,
		void *(*func)(void *),
		void *__restrict arg)
{
	struct tramp_data tramp_data = {
		.func = func,
		.arg = arg,
	};
	int ret;

	pthread_cond_init(&tramp_data.wait, NULL);
	pthread_mutex_init(&tramp_data.lock, NULL);

	pthread_mutex_lock(&tramp_data.lock);

	ret = pthread_create_orig(thread, attr, &tramp_func, &tramp_data);
	if (!ret)
		pthread_cond_wait(&tramp_data.wait, &tramp_data.lock);

	pthread_mutex_unlock(&tramp_data.lock);

	pthread_mutex_destroy(&tramp_data.lock);
	pthread_cond_destroy(&tramp_data.wait);

	return ret;
} 

void __cyg_profile_func_enter(void *this_fn, void *call_fn)
{
	struct prof_stack *st = &prof_stack[prof_stack_idx++];

	st->arc = prof_arc_find(this_fn, call_fn);
	st->fn = prof_fn_find(this_fn);
	st->stamp = mmap_read_self(perf_event);
}

void __cyg_profile_func_exit(void *this_fn, void *call_fn)
{
	struct prof_stack *st = &prof_stack[--prof_stack_idx];
	uint64_t now, delta;

	now = mmap_read_self(perf_event);
	delta = now - st->stamp;
	(void)__sync_fetch_and_add(&st->fn->count, delta);
	(void)__sync_fetch_and_add(&st->arc->count, delta);
}
/*
 * Library to hook into code compiled with -finstrument-functions it will
 * record function arcs (call_fn, this_fn) as well as the sum of whatever event
 * is being measured over that function.
 *
 * Copyright (C) 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@...hat.com>
 *
 * This file is copyrighted under the GPLv2 License (and not any later version).
 *
 * SuperFastHash under LGPLv2.1 (http://www.azillionmonkeys.com/qed/hash.html)
 *
 * Compilation example:
 * gcc -shared -fPIC profviz.c -o profviz.so -lpthread -ldl -lelf
 *
 * Usage example:
 * LD_PRELOAD=./profviz.so your_program 
 */

#define _GNU_SOURCE

#include "util/util.h"
#include "perf.h"
#include "util/parse-events.h"

#include <sys/time.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <link.h>
#include <math.h>

#define barrier() asm volatile("" ::: "memory")

static u64 rdpmc(unsigned int counter)
{
	unsigned int low, high;

	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));

	return low | ((u64)high) << 32;
}

static u64 rdtsc(void)
{
	unsigned int low, high;

	asm volatile("rdtsc" : "=a" (low), "=d" (high));

	return low | ((u64)high) << 32;
}

static u64 mmap_read_self(void *addr)
{
	struct perf_event_mmap_page *pc = addr;
	u32 seq, idx, time_mult = 0, time_shift = 0, width = 0;
	u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
	s64 pmc = 0;

	do {
		seq = pc->lock;
		barrier();

		enabled = pc->time_enabled;
		running = pc->time_running;

		if (pc->cap_usr_time && enabled != running) {
			cyc = rdtsc();
			time_mult = pc->time_mult;
			time_shift = pc->time_shift;
			time_offset = pc->time_offset;
		}

		idx = pc->index;
		count = pc->offset;
		if (pc->cap_usr_rdpmc && idx) {
			width = pc->pmc_width;
			pmc = rdpmc(idx - 1);
		}

		barrier();
	} while (pc->lock != seq);

	if (idx) {
		pmc <<= 64 - width;
		pmc >>= 64 - width; /* shift right signed */
		count += pmc;
	}

	if (enabled != running) {
		u64 quot, rem;

		quot = (cyc >> time_shift);
		rem = cyc & ((1 << time_shift) - 1);
		delta = time_offset + quot * time_mult +
			((rem * time_mult) >> time_shift);

		enabled += delta;
		if (idx)
			running += delta;

		quot = count / running;
		rem = count % running;
		count = quot * enabled + (rem * enabled) / running;
	}

	return count;
}

static int (*pthread_create_orig)(pthread_t *__restrict,
		__const pthread_attr_t *__restrict,
		void *(*)(void *),
		void *__restrict) = NULL;

static struct perf_event_attr perf_attr = {
	.type = PERF_TYPE_HARDWARE,
	.config = PERF_COUNT_HW_CPU_CYCLES,
	.exclude_kernel = 1,
};

struct prof_arc {
	void *call_fn;
	void *this_fn;
	uint64_t count;
};

struct prof_fn {
	void *this_fn;
	uint64_t count;
};

#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
		                       +(uint32_t)(((const uint8_t *)(d))[0]) )

static uint32_t SuperFastHash (const char * data, int len, uint32_t hash) {
	uint32_t tmp;
	int rem;

	if (len <= 0 || data == NULL) return 0;

	rem = len & 3;
	len >>= 2;

	/* Main loop */
	for (;len > 0; len--) {
		hash  += get16bits (data);
		tmp    = (get16bits (data+2) << 11) ^ hash;
		hash   = (hash << 16) ^ tmp;
		data  += 2*sizeof (uint16_t);
		hash  += hash >> 11;
	}

	/* Handle end cases */
	switch (rem) {
		case 3: hash += get16bits (data);
			hash ^= hash << 16;
			hash ^= data[sizeof (uint16_t)] << 18;
			hash += hash >> 11;
			break;
		case 2: hash += get16bits (data);
			hash ^= hash << 11;
			hash += hash >> 17;
			break;
		case 1: hash += *data;
			hash ^= hash << 10;
			hash += hash >> 1;
	}

	/* Force "avalanching" of final 127 bits */
	hash ^= hash << 3;
	hash += hash >> 5;
	hash ^= hash << 4;
	hash += hash >> 17;
	hash ^= hash << 25;
	hash += hash >> 6;

	return hash;
}

void die(const char *err, ...)
{
	va_list params;

	va_start(params, err);
	vfprintf(stderr, err, params);
	va_end(params);

	exit(-1);
}


#define HASH_TABLE_SIZE		16384
#define AVG_STACK_DEPTH		8

#define ARC_HASH_SIZE	(HASH_TABLE_SIZE * AVG_STACK_DEPTH)

static struct prof_arc prof_arc_hash[ARC_HASH_SIZE];
static pthread_mutex_t prof_arc_lock;

#define FN_HASH_SIZE	(HASH_TABLE_SIZE)

static struct prof_fn  prof_fn_hash[FN_HASH_SIZE];
static pthread_mutex_t prof_fn_lock;

#define HASH_INIT		0x9e370001UL	
#define HASH_CHAIN		16

static inline void *
prof_hash_find(void *hash_base, const size_t hash_size, 
		const void *key, const size_t key_size,
		const size_t entry_size, pthread_mutex_t *lock)

{
	uint32_t hash = HASH_INIT;
	void *entry = NULL;
	int i, j;

	for (i = 0; i < HASH_CHAIN; i++) {
		hash = SuperFastHash(key, key_size, hash);
		entry = hash_base + entry_size * (hash % FN_HASH_SIZE);
		if (!memcmp(entry, key, key_size))
			return entry;

		for (j = 0; j < key_size; j++) {
			if (*((char *)entry + j))
				goto next_1;
		}

		goto found_empty;
next_1:
		continue;
	}
	die("fn_hash too full");

found_empty:
	hash = HASH_INIT;
	pthread_mutex_lock(lock);
	for (i = 0; i < HASH_CHAIN; i++) {
		hash = SuperFastHash(key, key_size, hash);
		entry = hash_base + entry_size * (hash % FN_HASH_SIZE);

		if (!memcmp(entry, key, key_size))
			goto unlock;

		for (j = 0; j < key_size; j++) {
			if (*((char *)entry + j))
				goto next_2;
		}

		memcpy(entry, key, key_size);
		goto unlock;
next_2:
		continue;
	}
	die("fn_hash too full (locked)");
unlock:
	pthread_mutex_unlock(lock);

	return entry;
}

static struct prof_fn *prof_fn_find(void *this_fn)
{
	return prof_hash_find(prof_fn_hash, FN_HASH_SIZE,
			&this_fn, sizeof(this_fn), sizeof(struct prof_fn),
			&prof_fn_lock);
}

static struct prof_arc *prof_arc_find(void *this_fn, void *call_fn)
{
	struct prof_arc arc = { .this_fn = this_fn, .call_fn = call_fn, };

	return prof_hash_find(prof_arc_hash, ARC_HASH_SIZE,
			&arc, 2*sizeof(void *), sizeof(struct prof_arc),
			&prof_arc_lock);
}

static unsigned long page_size;

struct prof_stack {
	struct prof_arc *arc;
	struct prof_fn *fn;
	uint64_t stamp;
};

static __thread struct prof_stack prof_stack[128];
static __thread int prof_stack_idx = 0;

static __thread void *perf_event;

static void thread_init(void)
{
	int fd;

	fd = sys_perf_event_open(&perf_attr, 0, -1, -1, 0);
	if (fd < 0)
		die("failed to create perf_event");

	perf_event = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
	if (perf_event == (void *)(-1))
		die("failed to mmap perf_event");

	close(fd);

	prof_stack_idx = 0;
}

static u64 first_count;

void prof_init(void) __attribute__((constructor));
void prof_init(void)
{
	char *event_str;

	page_size = sysconf(_SC_PAGESIZE);

	pthread_create_orig = dlsym(RTLD_NEXT, "pthread_create");
	if (!pthread_create_orig) {
		char *error = dlerror();
		if (!error)
			error = "pthread_create is NULL";
		die("%s\n", error);
	}

	memset(prof_arc_hash, 0, sizeof(prof_arc_hash));
	pthread_mutex_init(&prof_arc_lock, NULL);

	memset(prof_fn_hash, 0, sizeof(prof_fn_hash));
	pthread_mutex_init(&prof_fn_lock, NULL);

	event_str = getenv("PROF_EVENT");
	if (event_str)
		/* perf_attr = parse_attr_crap(event_str); */
		;

	thread_init(); /* main thread */

	first_count = mmap_read_self(perf_event);
}

struct prof_symbol {
	void *addr;
	unsigned long size;
	const char *name;
};

static struct prof_symbol *prof_symbols;
static unsigned long prof_nr_symbols;

static void prof_gelf(const char *name, void (*func)(Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, void *data), void *data)
{
	Elf         *elf;
	Elf_Scn     *scn = NULL;
	GElf_Shdr   shdr;
	int         fd;

	elf_version(EV_CURRENT);

	fd = open(name, O_RDONLY);
	elf = elf_begin(fd, ELF_C_READ, NULL);

	while ((scn = elf_nextscn(elf, scn)) != NULL) {
		gelf_getshdr(scn, &shdr);
		if (shdr.sh_type == SHT_SYMTAB) {
			func(elf, scn, &shdr, data);
		}
	}

	elf_end(elf);
	close(fd);
}

static void prof_gelf_nr_symbols(Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, void *data)
{
	unsigned long *nr_symbols = data;
	unsigned long count;

	count = shdr->sh_size / shdr->sh_entsize;
	*nr_symbols += count;
}

static int prof_count_symbols(struct dl_phdr_info *info, size_t size, void *data)
{
	prof_gelf(info->dlpi_name, prof_gelf_nr_symbols, data);
	return 0;
}

static void prof_gelf_load_symbols(Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, void *_info)
{
	struct dl_phdr_info *info = _info;
	unsigned long count = shdr->sh_size / shdr->sh_entsize;
	unsigned long i;
	Elf_Data *data;

	data = elf_getdata(scn, NULL);

	for (i = 0; i < count; i++) {
		GElf_Sym sym;
		char *name;
		struct prof_symbol *symbol = &prof_symbols[prof_nr_symbols];

		gelf_getsym(data, i, &sym);

		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
			continue;

		if (!sym.st_size)
			continue;

		name = elf_strptr(elf, shdr->sh_link, sym.st_name);
		if (!name)
			continue;

		symbol->name = strdup(name);
		symbol->addr = (void *)(sym.st_value + info->dlpi_addr);
		symbol->size = sym.st_size;

		prof_nr_symbols++;
	}
}

static int prof_load_symbols(struct dl_phdr_info *info, size_t size, void *data)
{
	prof_gelf(info->dlpi_name, prof_gelf_load_symbols, info);
	return 0;
}

static int prof_cmp_symbol(const void *_a, const void *_b)
{
	const struct prof_symbol *a = _a, *b = _b;

	if (a->addr < b->addr)
		return -1;

	if (a->addr > b->addr)
		return 1;

	return 0;
}

static void load_symbols(void)
{
	unsigned long nr_symbols = 0;
	struct dl_phdr_info dl_info = {
		.dlpi_name = "/proc/self/exe",
		.dlpi_addr = 0,
	};

	prof_count_symbols(&dl_info, sizeof(dl_info), &nr_symbols);
	dl_iterate_phdr(prof_count_symbols, &nr_symbols);

	prof_symbols = calloc(nr_symbols, sizeof(struct prof_symbol));

	prof_load_symbols(&dl_info, sizeof(dl_info), NULL);
	dl_iterate_phdr(prof_load_symbols, NULL);

	qsort(prof_symbols, prof_nr_symbols, 
			sizeof(struct prof_symbol), prof_cmp_symbol);
}

static struct prof_symbol *find_symbol(void *addr)
{
	struct prof_symbol *sym;
	unsigned long l, u, i;

	l = 0;
	u = prof_nr_symbols;

	while (l < u) {
		i = (l + u) / 2;
		sym = &prof_symbols[i];

		if (addr >= sym->addr && addr < sym->addr + sym->size)
			return sym;

		if (addr < sym->addr)
			u = i;
		else
			l = i + 1;
	}

	return NULL;
}

void prof_exit(void) __attribute__((destructor));
void prof_exit(void)
{
	FILE *file;
	int i;
	int64_t max_fn_count = 0;

	load_symbols();

	file = fopen("prof.dot", "w");
	if (!file)
		die("failed to create output file");

	fprintf(file, "#\n# first count: %lu\n#\n", first_count);

	/*
	 * Maybe replace "profile" with the argv
	 */
	fprintf(file, "digraph profile {\n");

	for (i = 0; i < ARC_HASH_SIZE; i++) {
		struct prof_arc *arc = &prof_arc_hash[i];
		struct prof_symbol *c_sym, *t_sym;
		struct prof_fn *c;
		double p;

		if (!(arc->call_fn && arc->this_fn))
			continue;

		c_sym = find_symbol(arc->call_fn);
		t_sym = find_symbol(arc->this_fn);

		if (!c_sym || !t_sym)
			die("symbols missing");

		c = prof_fn_find(c_sym->addr);
		if (!c)
			die("fn_hash|symtab borken");

		p = (double)arc->count / (double)c->count;
		fprintf(file, 
		"  \"%s\" -> \"%s\" [label=\"%f\", color=\"%.3f %.3f %.3f\"]\n",
			c_sym->name, t_sym->name, 100.0 * p,
			0.33, 1.0, p);
	}

	for (i = 0; i < ARC_HASH_SIZE; i++) {
		struct prof_arc *arc = &prof_arc_hash[i];
		struct prof_symbol *c_sym;
		struct prof_fn *c;
		double p;

		if (!(arc->call_fn && arc->this_fn))
			continue;

		c_sym = find_symbol(arc->call_fn);
		c = prof_fn_find(c_sym->addr);

		c->count -= arc->count;
	}

	for (i = 0; i < FN_HASH_SIZE; i++) {
		struct prof_fn *fn = &prof_fn_hash[i];
		struct prof_symbol *s;

		if (!fn->this_fn)
			continue;

		if ((int64_t)fn->count > max_fn_count)
			max_fn_count = fn->count;

		s = find_symbol(fn->this_fn);
//		fprintf(file, "#  %s %ld\n", s->name, fn->count);
	}

	for (i = 0; i < FN_HASH_SIZE; i++) {
		struct prof_fn *fn = &prof_fn_hash[i];
		struct prof_symbol *s;
		double p;

		if (!fn->this_fn)
			continue;

		s = find_symbol(fn->this_fn);
		p = (double)fn->count / (double)max_fn_count;
		fprintf(file, "  \"%s\" [color=\"%.3f %.3f %.3f\"]\n", 
				s->name, 0.0, 1.0, p);
	}

	fprintf(file, "}\n");
	fflush(file);
	fclose(file);
}

struct tramp_data {

	void *(*func)(void *);
	void *arg;

	pthread_mutex_t lock;
	pthread_cond_t  wait;
};

static void *tramp_func(void *data)
{
	struct tramp_data *tramp_data = data;
	void *(*func)(void *) = tramp_data->func;
	void *arg = tramp_data->arg;
	void *ret;

	thread_init();

	pthread_mutex_lock(&tramp_data->lock);
	pthread_cond_signal(&tramp_data->wait);
	pthread_mutex_unlock(&tramp_data->lock);

	ret = func(arg);

	munmap(perf_event, page_size);

	return ret;
}

/* hijack pthread_create() */
int pthread_create(pthread_t *__restrict thread,
		__const pthread_attr_t *__restrict attr,
		void *(*func)(void *),
		void *__restrict arg)
{
	struct tramp_data tramp_data = {
		.func = func,
		.arg = arg,
	};
	int ret;

	pthread_cond_init(&tramp_data.wait, NULL);
	pthread_mutex_init(&tramp_data.lock, NULL);

	pthread_mutex_lock(&tramp_data.lock);

	ret = pthread_create_orig(thread, attr, &tramp_func, &tramp_data);
	if (!ret)
		pthread_cond_wait(&tramp_data.wait, &tramp_data.lock);

	pthread_mutex_unlock(&tramp_data.lock);

	pthread_mutex_destroy(&tramp_data.lock);
	pthread_cond_destroy(&tramp_data.wait);

	return ret;
} 

void __cyg_profile_func_enter(void *this_fn, void *call_fn)
{
	struct prof_stack *st = &prof_stack[prof_stack_idx++];

	st->arc = prof_arc_find(this_fn, call_fn);
	st->fn = prof_fn_find(this_fn);
	st->stamp = mmap_read_self(perf_event);
}

void __cyg_profile_func_exit(void *this_fn, void *call_fn)
{
	struct prof_stack *st = &prof_stack[--prof_stack_idx];
	uint64_t now, delta;

	now = mmap_read_self(perf_event);
	delta = now - st->stamp;
	(void)__sync_fetch_and_add(&st->fn->count, delta);
	(void)__sync_fetch_and_add(&st->arc->count, delta);
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ