lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 10 Feb 2014 12:29:14 -0500
From:	Don Zickus <dzickus@...hat.com>
To:	acme@...stprotocols.net
Cc:	LKML <linux-kernel@...r.kernel.org>, jolsa@...hat.com,
	jmario@...hat.com, fowles@...each.com, eranian@...gle.com,
	Don Zickus <dzickus@...hat.com>
Subject: [PATCH 19/21] perf, c2c: Add framework to analyze latency and display summary stats

The overall goal for the cache-to-cache contention tool is to find extreme latencies and
help point out the problem, so they can be fixed.  The big assumption is remote cache hits
cause the biggest contentions.

Those are summarized by previous patches.  However, we still have non-remote cache hits with
high latency.  We display those here in a table.  We identify the outliers and focus on them.

Orignal work done by Dick Fowles.  I just ported it over to the perf framework.

Suggeted-by: Joe Mario <jmario@...hat.com>
Original-by: Dick Fowles <rfowles@...hat.com>
Signed-off-by: Don Zickus <dzickus@...hat.com>
---
 tools/perf/builtin-c2c.c | 456 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 456 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 014c9b0..b1d4a8b 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <sched.h>
+#include <math.h>
 
 typedef struct {
 	int  locks;               /* count of 'lock' transactions */
@@ -46,6 +47,20 @@ struct c2c_stats {
 	struct stats		stats;
 };
 
+struct c2c_latency_stats {
+	int    min;
+	int    mode;
+	int    max;
+	int    cnt;
+	int    thrshld;
+	double median;
+	double stddev;
+	double cv;
+	double ci;
+	double mean;
+	struct rb_node *start;
+};
+
 struct perf_c2c {
 	struct perf_tool tool;
 	bool		 raw_records;
@@ -60,6 +75,7 @@ struct perf_c2c {
 
 struct c2c_entry {
 	struct rb_node		rb_node;
+	struct rb_node		latency;
 	struct list_head	scratch;  /* scratch list for resorting */
 	struct thread		*thread;
 	int			tid;  /* FIXME perf maps broken */
@@ -97,6 +113,21 @@ struct c2c_hit {
 	struct callchain_root   callchain[0]; /* must be last member */
 };
 
+typedef struct {
+	const char *label;
+	const char *fmt;
+	void       *overall;
+	void       *extremes;
+	void       *analyze;
+} stats_t;
+
+
+enum { EMPTY, SYMBOL, OBJECT };
+enum { OVERALL, EXTREMES, ANALYZE, SCOPES };
+enum { INVALID, NONE, INTEGER, DOUBLE };
+
+struct c2c_latency_stats hist_info[SCOPES];
+
 enum { OP, LVL, SNP, LCK, TLB };
 
 #define RMT_RAM              (PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_REM_RAM2)
@@ -512,6 +543,34 @@ static int c2c_hitm__add_to_list(struct rb_root *root, struct c2c_hit *h)
 	return 0;
 }
 
+static int c2c_latency__add_to_list(struct rb_root *root, struct c2c_entry *n)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct c2c_entry *ne;
+	int64_t cmp;
+
+	p = &root->rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		ne = rb_entry(parent, struct c2c_entry, latency);
+
+		/* sort on weight */
+		cmp = ne->weight - n->weight;
+
+		if (cmp > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&n->latency, parent, p);
+	rb_insert_color(&n->latency, root);
+
+	return 0;
+}
+
 static int perf_c2c__fprintf_header(FILE *fp)
 {
 	int printed = fprintf(fp, "%c %-16s  %6s  %6s  %4s  %18s  %18s  %18s  %6s  %-10s %-60s %s\n", 
@@ -1048,6 +1107,402 @@ cleanup:
 	}
 }
 
+stats_t data[] = {
+	{ "Samples           ", "%20d",   &hist_info[OVERALL].cnt,     &hist_info[EXTREMES].cnt,     &hist_info[ANALYZE].cnt     },
+	{ " ",                   NULL,    NULL,                        NULL,                         NULL                        },
+	{ "Minimum           ", "%20d",   &hist_info[OVERALL].min,     &hist_info[EXTREMES].min,     &hist_info[ANALYZE].min     },
+	{ "Maximum           ", "%20d",   &hist_info[OVERALL].max,     &hist_info[EXTREMES].max,     &hist_info[ANALYZE].max     },
+	{ "Threshold         ", "%20d",   &hist_info[OVERALL].thrshld, &hist_info[EXTREMES].thrshld, &hist_info[ANALYZE].thrshld },
+	{ " ",                   NULL,    NULL,                        NULL,                         NULL                        },
+	{ "Mode              ", "%20d",   &hist_info[OVERALL].mode,    &hist_info[EXTREMES].mode,    &hist_info[ANALYZE].mode    },
+	{ "Median            ", "%20.0f", &hist_info[OVERALL].median,  &hist_info[EXTREMES].median,  &hist_info[ANALYZE].median  },
+	{ "Mean              ", "%20.0f", &hist_info[OVERALL].mean,    &hist_info[EXTREMES].mean,    &hist_info[ANALYZE].mean    },
+	{ " ",                   NULL,    NULL,                        NULL,                         NULL                        },
+	{ "Std Dev           ", "%20.1f", &hist_info[OVERALL].stddev,  &hist_info[EXTREMES].stddev,  &hist_info[ANALYZE].stddev   },
+	{ "Coeff of Variation", "%20.3f", &hist_info[OVERALL].cv,      &hist_info[EXTREMES].cv,      &hist_info[ANALYZE].cv      },
+	{ "Confid Interval   ", "%20.1f", &hist_info[OVERALL].ci,      &hist_info[EXTREMES].ci,      &hist_info[ANALYZE].ci      },
+};
+
+#define STATS_ENTRIES (sizeof(data) / sizeof(stats_t))
+enum { C90, C95 };
+
+static double tdist(int ci, int num_samples)
+{
+	#define MAX_ENTRIES       32
+	#define INFINITE_SAMPLES  (MAX_ENTRIES-1)
+
+	/*
+	 * Student's T-distribution for 90% & 95% confidence intervals
+	 * The last entry is the value for infinite degress of freedom 
+	 */
+
+	static double t_dist[MAX_ENTRIES][2] = {
+				{ NAN,     NAN   },        /*   0  */
+				{ 6.31,   12.71  },        /*   1  */
+				{ 2.92,    4.30  },        /*   2  */
+				{ 2.35,    3.18  },        /*   3  */
+				{ 2.13,    2.78  },        /*   4  */
+				{ 2.02,    2.57  },        /*   5  */
+				{ 1.94,    2.45  },        /*   6  */
+				{ 1.90,    2.36  },        /*   7  */
+				{ 1.86,    2.31  },        /*   8  */
+				{ 1.83,    2.26  },        /*   9  */
+				{ 1.81,    2.23  },        /*  10  */
+				{ 1.80,    2.20  },        /*  11  */
+				{ 1.78,    2.18  },        /*  12  */
+				{ 1.77,    2.16  },        /*  13  */
+				{ 1.76,    2.14  },        /*  14  */
+				{ 1.75,    2.13  },        /*  15  */
+				{ 1.75,    2.12  },        /*  16  */
+				{ 1.74,    2.11  },        /*  17  */
+				{ 1.73,    2.10  },        /*  18  */
+				{ 1.73,    2.09  },        /*  19  */
+				{ 1.72,    2.09  },        /*  20  */
+				{ 1.72,    2.08  },        /*  21  */
+				{ 1.72,    2.07  },        /*  22  */
+				{ 1.71,    2.07  },        /*  23  */
+				{ 1.71,    2.06  },        /*  24  */
+				{ 1.71,    2.06  },        /*  25  */
+				{ 1.71,    2.06  },        /*  26  */
+				{ 1.70,    2.05  },        /*  27  */
+				{ 1.70,    2.05  },        /*  28  */
+				{ 1.70,    2.04  },        /*  29  */
+				{ 1.70,    2.04  },        /*  30  */
+				{ 1.645,   1.96  },        /*  31  */
+	};
+
+	double tvalue;
+
+	tvalue = 0;
+
+	switch (ci) {
+
+	case C90: /* 90% CI */
+		tvalue = ((num_samples-1) > 30) ? t_dist[INFINITE_SAMPLES][ci] : t_dist[num_samples-1][ci];
+		break;
+
+	case C95: /* 95% CI */
+		tvalue = ((num_samples-1) > 30) ? t_dist[INFINITE_SAMPLES][ci] : t_dist[num_samples-1][ci];
+		break;
+
+	default:
+		printf("internal error - invalid confidence interval value specified");
+		break;
+	}
+
+	return tvalue;
+}
+
+static inline void print_latency_info_header(void)
+{
+	const char	*title;
+	char		hdrstr[256];
+	int		twidth;
+	int		size;
+	int		pad;
+	int		i;
+
+	twidth = sprintf(hdrstr, "%-20s%20s%20s%20s",
+				"Metric", "Overall", "Extremes", "Selected");
+	title = "Execution Latency For Loads to Non Shared Memory";
+	size  = strlen(title);
+	pad   = (twidth - size)/2;
+
+	printf("\n\n");
+	for (i = 0; i < twidth; i++) printf("=");
+	printf("\n");
+
+	if (pad > 0) {
+		for (i = 0; i < pad; i++) printf(" ");
+	}
+	printf("%s\n", title);
+	printf("\n");
+
+	printf("%s\n", hdrstr);
+
+	for (i = 0; i < twidth; i++) printf("=");
+	printf("\n");
+}
+
+static void print_latency_info(void)
+{
+#define  LBLFMT "%-20s"
+
+	char	fmtstr[32];
+	int	i, dtype;
+	stats_t *ptr;
+
+	print_latency_info_header();
+
+	for (i = 0; i < (int)STATS_ENTRIES; i++) {
+
+		ptr = &data[i];
+
+		dtype = INVALID;
+
+		if (ptr->fmt == NULL)  {
+
+			dtype = EMPTY;
+
+		} else {
+
+			if (strchr(ptr->fmt, 'd') != NULL) dtype = INTEGER;
+			if (strchr(ptr->fmt, 'f') != NULL) dtype = DOUBLE;
+
+			strcpy(fmtstr, ptr->fmt);
+			strtok(fmtstr, ".d");
+			strcat(fmtstr, "s");
+
+		}
+
+		switch (dtype) {
+
+		case INTEGER:
+			printf(LBLFMT, ptr->label);
+			(ptr->overall  != NULL) ? printf(ptr->fmt, *((int *)ptr->overall))  : printf(fmtstr, "na");
+			(ptr->extremes != NULL) ? printf(ptr->fmt, *((int *)ptr->extremes)) : printf(fmtstr, "na");
+			(ptr->analyze  != NULL) ? printf(ptr->fmt, *((int *)ptr->analyze))  : printf(fmtstr, "na");
+			printf("\n");
+			break;
+
+
+		case DOUBLE:
+			printf(LBLFMT, ptr->label);
+			(ptr->overall  != NULL) ? printf(ptr->fmt, *((double *)ptr->overall))  : printf(fmtstr, "na");
+			(ptr->extremes != NULL) ? printf(ptr->fmt, *((double *)ptr->extremes)) : printf(fmtstr, "na");
+			(ptr->analyze  != NULL) ? printf(ptr->fmt, *((double *)ptr->analyze))  : printf(fmtstr, "na");
+			printf("\n");
+			break;
+
+
+		case EMPTY:
+			printf("\n");
+			break;
+
+
+		default:
+			printf("internal error - unsupported formtat specifier : %s\n", ptr->fmt);
+			break;
+
+		};
+
+	}
+	printf("\n\n");
+}
+
+static void calculate_latency_info(struct rb_root *tree,
+				   struct stats *stats,
+				   struct c2c_latency_stats *overall,
+				   struct c2c_latency_stats *extremes,
+				   struct c2c_latency_stats *selected)
+{
+	struct rb_node *next = rb_first(tree);
+	struct rb_node *start = NULL;
+	struct c2c_entry *n;
+	int count = 0, weight = 0;
+	int mode = 0, mode_count = 0, idx = 0;
+	int median;
+	double threshold;
+	struct stats s;
+
+
+	median = stats->n / 2.0;
+
+	overall->cnt = stats->n;
+	overall->min = stats->min;
+	overall->max = stats->max;
+	overall->thrshld = 0;
+	overall->mean = avg_stats(stats);
+	overall->stddev = stddev_stats(stats);
+	overall->cv = overall->stddev / overall->mean;
+	overall->ci = (tdist(C90, stats->n) * stddev_stats(stats)) / sqrt(stats->n);
+	overall->start = next;
+
+	/* set threshold to mean + 3 * stddev of stats */
+	threshold = avg_stats(stats) + 3 * stddev_stats(stats);
+	init_stats(&s);
+
+	/* calculate overall latency */
+	while (next) {
+		n = rb_entry(next, struct c2c_entry, latency);
+		next = rb_next(&n->latency);
+
+		/* sorted on weight, makes counting easy, look for boundary */
+		if (n->weight != weight) {
+			if (count > mode_count) {
+				mode = weight;
+				mode_count = count;
+			}
+			count = 0;
+			weight = n->weight;
+		}
+		count++;
+
+		if (idx == median)
+			overall->median = n->weight;
+
+		/* save start for extreme latency calculation */
+		if (n->weight > threshold) {
+			if (!start)
+				start = next;
+
+			update_stats(&s, n->weight);
+		}
+
+		idx++;
+	}
+	/* count last set */
+	if (count > mode_count)
+		mode = weight;
+
+	overall->mode = mode;
+
+	/* calculate extreme latency */
+	next = start;
+	start = NULL;
+	idx = 0;
+	count = 0;
+	mode_count = 0;
+	mode = 0;
+	weight = 0;
+	median = s.n / 2.0;
+
+	extremes->cnt = s.n;
+	extremes->min = s.min;
+	extremes->max = s.max;
+	extremes->thrshld = threshold;
+	extremes->mean = avg_stats(&s);
+	extremes->stddev = stddev_stats(&s);
+	extremes->cv = extremes->stddev / extremes->mean;
+	extremes->ci = (tdist(C90, s.n) * stddev_stats(&s)) / sqrt(s.n);
+	extremes->start = next;
+
+	/* set threshold to mean + 3 * stddev of stats */
+	threshold = avg_stats(&s) + 3 * stddev_stats(&s);
+	init_stats(&s);
+
+	while (next) {
+		n = rb_entry(next, struct c2c_entry, latency);
+		next = rb_next(&n->latency);
+
+		/* sorted on weight, makes counting easy, look for boundary */
+		if (n->weight != weight) {
+			if (count > mode_count) {
+				mode = weight;
+				mode_count = count;
+			}
+			count = 0;
+			weight = n->weight;
+		}
+		count++;
+
+		if (idx == median)
+			extremes->median = n->weight;
+
+		/* save start for extreme latency calculation */
+		if (n->weight > threshold) {
+			if (!start)
+				start = next;
+
+			update_stats(&s, n->weight);
+		}
+
+		idx++;
+	}
+	/* count last set */
+	if (count > mode_count)
+		mode = weight;
+
+	extremes->mode = mode;
+
+	/* calculate analyze latency */
+	next = start;
+	idx = 0;
+	count = 0;
+	mode_count = 0;
+	mode = 0;
+	weight = 0;
+	median = s.n / 2.0;
+
+	selected->cnt = s.n;
+	selected->min = s.min;
+	selected->max = s.max;
+	selected->thrshld = threshold;
+	selected->mean = avg_stats(&s);
+	selected->stddev = stddev_stats(&s);
+	selected->cv = selected->stddev / selected->mean;
+	selected->ci = (tdist(C90, s.n) * stddev_stats(stats)) / sqrt(s.n);
+	selected->start = next;
+
+	while (next) {
+		n = rb_entry(next, struct c2c_entry, latency);
+		next = rb_next(&n->latency);
+
+		/* sorted on weight, makes counting easy, look for boundary */
+		if (n->weight != weight) {
+			if (count > mode_count) {
+				mode = weight;
+				mode_count = count;
+			}
+			count = 0;
+			weight = n->weight;
+		}
+		count++;
+
+		if (idx == median)
+			selected->median = n->weight;
+
+		idx++;
+	}
+	/* count last set */
+	if (count > mode_count)
+		mode = weight;
+
+	selected->mode = mode;
+}
+
+static void c2c_analyze_latency(struct perf_c2c *c2c)
+{
+
+	struct rb_node *next = rb_first(&c2c->tree_physid);
+	struct c2c_entry *n;
+	struct c2c_latency_stats *overall, *extremes, *selected;
+	struct rb_root lat_tree = RB_ROOT;
+	struct c2c_stats lat_stats;
+	u64 snoop;
+	struct stats s;
+
+	init_stats(&s);
+	memset(&lat_stats, 0, sizeof(struct c2c_stats));
+	memset(&hist_info, 0, sizeof(struct c2c_latency_stats) * SCOPES);
+
+	overall = &hist_info[OVERALL];
+	extremes = &hist_info[EXTREMES];
+	selected = &hist_info[ANALYZE];
+
+	/* sort on latency */
+	while (next) {
+		n = rb_entry(next, struct c2c_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		snoop  = n->mi->data_src.mem_snoop;
+
+		/* filter out HITs as un-interesting */
+		if ((snoop & P(SNOOP, HIT)) ||
+		    (snoop & P(SNOOP, HITM)) ||
+		    (snoop & P(SNOOP, NA)))
+			continue;
+
+		c2c_latency__add_to_list(&lat_tree, n);
+		update_stats(&s, n->weight);
+	}
+
+	calculate_latency_info(&lat_tree, &s, overall, extremes, selected);
+	print_latency_info();
+
+	return;
+}
+
 static void print_c2c_shared_cacheline_report(struct rb_root *hitm_tree,
 					      struct c2c_stats *shared_stats __maybe_unused,
 					      struct c2c_stats *c2c_stats __maybe_unused)
@@ -1761,6 +2216,7 @@ static int perf_c2c__process_events(struct perf_session *session,
 	if (verbose > 2)
 		dump_rb_tree(&c2c->tree_physid, c2c);
 	print_c2c_trace_report(c2c);
+	c2c_analyze_latency(c2c);
 	c2c_analyze_hitms(c2c);
 	print_symbol_record_count(&c2c->tree_physid);
 
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ