lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230218201932.818586688@goodmis.org>
Date:   Sat, 18 Feb 2023 15:18:26 -0500
From:   Steven Rostedt <rostedt@...dmis.org>
To:     linux-kernel@...r.kernel.org
Cc:     Masami Hiramatsu <mhiramat@...nel.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Daniel Bristot de Oliveira <bristot@...nel.org>,
        Jonathan Corbet <corbet@....net>
Subject: [for-next][PATCH 5/8] rtla/timerlat: Add auto-analysis support to timerlat top

From: Daniel Bristot de Oliveira <bristot@...nel.org>

Currently, timerlat top displays the timerlat tracer latency results, saving
the intuitive timerlat trace for the developer to analyze.

This patch goes a step forward in the automaton of the scheduling latency
analysis by providing a summary of the root cause of a latency higher than
the passed "stop tracing" parameter if the trace stops.

The output is intuitive enough for non-expert users to have a general idea
of the root cause by looking at each factor's contribution percentage while
keeping the technical detail in the output for more expert users to start
an in dept debug or to correlate a root cause with an existing one.

The terminology is in line with recent industry and academic publications
to facilitate the understanding of both audiences.

Here is one example of tool output:
 ----------------------------------------- %< -----------------------------------------------------
  # taskset -c 0 timerlat -a 40 -c 1-23 -q
                                     Timer Latency
    0 00:00:12   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
  CPU COUNT      |      cur       min       avg       max |      cur       min       avg       max
    1 #12322     |        0         0         1        15 |       10         3         9        31
    2 #12322     |        3         0         1        12 |       10         3         9        23
    3 #12322     |        1         0         1        21 |        8         2         8        34
    4 #12322     |        1         0         1        17 |       10         2        11        33
    5 #12322     |        0         0         1        12 |        8         3         8        25
    6 #12322     |        1         0         1        14 |       16         3        11        35
    7 #12322     |        0         0         1        14 |        9         2         8        29
    8 #12322     |        1         0         1        22 |        9         3         9        34
    9 #12322     |        0         0         1        14 |        8         2         8        24
   10 #12322     |        1         0         0        12 |        9         3         8        24
   11 #12322     |        0         0         0        15 |        6         2         7        29
   12 #12321     |        1         0         0        13 |        5         3         8        23
   13 #12319     |        0         0         1        14 |        9         3         9        26
   14 #12321     |        1         0         0        13 |        6         2         8        24
   15 #12321     |        1         0         1        15 |       12         3        11        27
   16 #12318     |        0         0         1        13 |        7         3        10        24
   17 #12319     |        0         0         1        13 |       11         3         9        25
   18 #12318     |        0         0         0        12 |        8         2         8        20
   19 #12319     |        0         0         1        18 |       10         2         9        28
   20 #12317     |        0         0         0        20 |        9         3         8        34
   21 #12318     |        0         0         0        13 |        8         3         8        28
   22 #12319     |        0         0         1        11 |        8         3        10        22
   23 #12320     |       28         0         1        28 |       41         3        11        41
  rtla timerlat hit stop tracing
  ## CPU 23 hit stop tracing, analyzing it ##
  IRQ handler delay:				      	    27.49 us (65.52 %)
  IRQ latency:						    28.13 us
  Timerlat IRQ duration:				     9.59 us (22.85 %)
  Blocking thread:					     3.79 us (9.03 %)
			objtool:49256    		     3.79 us
    Blocking thread stacktrace
		-> timerlat_irq
		-> __hrtimer_run_queues
		-> hrtimer_interrupt
		-> __sysvec_apic_timer_interrupt
		-> sysvec_apic_timer_interrupt
		-> asm_sysvec_apic_timer_interrupt
		-> _raw_spin_unlock_irqrestore
		-> cgroup_rstat_flush_locked
		-> cgroup_rstat_flush_irqsafe
		-> mem_cgroup_flush_stats
		-> mem_cgroup_wb_stats
		-> balance_dirty_pages
		-> balance_dirty_pages_ratelimited_flags
		-> btrfs_buffered_write
		-> btrfs_do_write_iter
		-> vfs_write
		-> __x64_sys_pwrite64
		-> do_syscall_64
		-> entry_SYSCALL_64_after_hwframe
  ------------------------------------------------------------------------
    Thread latency:					    41.96 us (100%)

  The system has exit from idle latency!
    Max timerlat IRQ latency from idle: 17.48 us in cpu 4
  Saving trace to timerlat_trace.txt
 ----------------------------------------- >% -----------------------------------------------------

In this case, the major factor was the delay suffered by the IRQ handler
that handles timerlat wakeup: 65.52 %. This can be caused by the
current thread masking interrupts, which can be seen in the blocking
thread stacktrace: the current thread (objtool:49256) disabled interrupts
via raw spin lock operations inside mem cgroup, while doing write
syscall in a btrfs file system.

A simple search for the function name on Google shows that this is
a legit case for disabling the interrupts:

  cgroup: Use irqsave in cgroup_rstat_flush_locked()
  lore.kernel.org/linux-mm/20220301122143.1521823-2-bigeasy@...utronix.de/

The output also prints other reasons for the latency root cause, such as:

  - an IRQ that happened before the IRQ handler that caused delays
  - The interference from NMI, IRQ, Softirq, and Threads

The details about how these factors affect the scheduling latency
can be found here:

   https://bristot.me/demystifying-the-real-time-linux-latency/

Link: https://lkml.kernel.org/r/3d45f40e630317f51ac6d678e2d96d310e495729.1675179318.git.bristot@kernel.org

Cc: Daniel Bristot de Oliveira <bristot@...nel.org>
Cc: Jonathan Corbet <corbet@....net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@...nel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
 tools/tracing/rtla/src/timerlat_top.c | 46 +++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 334271935222..eea5b3357e27 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -10,10 +10,12 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <time.h>
+#include <errno.h>
 
 #include "utils.h"
 #include "osnoise.h"
 #include "timerlat.h"
+#include "timerlat_aa.h"
 
 struct timerlat_top_params {
 	char			*cpus;
@@ -30,6 +32,8 @@ struct timerlat_top_params {
 	int			quiet;
 	int			set_sched;
 	int			dma_latency;
+	int			no_aa;
+	int			dump_tasks;
 	struct sched_attr	sched_param;
 	struct trace_events	*events;
 };
@@ -130,17 +134,22 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
 		     struct tep_event *event, void *context)
 {
 	struct trace_instance *trace = context;
+	struct timerlat_top_params *params;
 	unsigned long long latency, thread;
 	struct osnoise_tool *top;
 	int cpu = record->cpu;
 
 	top = container_of(trace, struct osnoise_tool, trace);
+	params = top->params;
 
 	tep_get_field_val(s, event, "context", record, &thread, 1);
 	tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
 
 	timerlat_top_update(top, cpu, thread, latency);
 
+	if (!params->no_aa)
+		timerlat_aa_handler(s, record, event, context);
+
 	return 0;
 }
 
@@ -281,11 +290,13 @@ static void timerlat_top_usage(char *usage)
 		"	  -c/--cpus cpus: run the tracer only on the given cpus",
 		"	  -d/--duration time[m|h|d]: duration of the session in seconds",
 		"	  -D/--debug: print debug info",
+		"	     --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
 		"	  -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
 		"	  -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
 		"	     --filter <command>: enable a trace event filter to the previous -e event",
 		"	     --trigger <command>: enable a trace event trigger to the previous -e event",
 		"	  -n/--nano: display data in nanoseconds",
+		"	     --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
 		"	  -q/--quiet print only a summary at the end",
 		"	     --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
 		"	  -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
@@ -349,13 +360,15 @@ static struct timerlat_top_params
 			{"trigger",		required_argument,	0, '0'},
 			{"filter",		required_argument,	0, '1'},
 			{"dma-latency",		required_argument,	0, '2'},
+			{"no-aa",		no_argument,		0, '3'},
+			{"dump-tasks",		no_argument,		0, '4'},
 			{0, 0, 0, 0}
 		};
 
 		/* getopt_long stores the option index here. */
 		int option_index = 0;
 
-		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:",
+		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:34",
 				 long_options, &option_index);
 
 		/* detect the end of the options. */
@@ -368,13 +381,13 @@ static struct timerlat_top_params
 
 			/* set thread stop to auto_thresh */
 			params->stop_total_us = auto_thresh;
+			params->stop_us = auto_thresh;
 
 			/* get stack trace */
 			params->print_stack = auto_thresh;
 
 			/* set trace */
 			params->trace_output = "timerlat_trace.txt";
-
 			break;
 		case 'c':
 			retval = parse_cpu_list(optarg, &params->monitored_cpus);
@@ -437,6 +450,7 @@ static struct timerlat_top_params
 				params->trace_output = &optarg[1];
 			else
 				params->trace_output = "timerlat_trace.txt";
+
 			break;
 		case '0': /* trigger */
 			if (params->events) {
@@ -467,6 +481,12 @@ static struct timerlat_top_params
 				exit(EXIT_FAILURE);
 			}
 			break;
+		case '3': /* no-aa */
+			params->no_aa = 1;
+			break;
+		case '4':
+			params->dump_tasks = 1;
+			break;
 		default:
 			timerlat_top_usage("Invalid option");
 		}
@@ -477,6 +497,12 @@ static struct timerlat_top_params
 		exit(EXIT_FAILURE);
 	}
 
+	/*
+	 * Auto analysis only happens if stop tracing, thus:
+	 */
+	if (!params->stop_us && !params->stop_total_us)
+		params->no_aa = 1;
+
 	return params;
 }
 
@@ -547,6 +573,7 @@ static struct osnoise_tool
 {
 	struct osnoise_tool *top;
 	int nr_cpus;
+	int retval;
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 
@@ -563,6 +590,16 @@ static struct osnoise_tool
 	tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
 				   timerlat_top_handler, top);
 
+	/*
+	 * If no auto analysis, we are ready.
+	 */
+	if (params->no_aa)
+		return top;
+
+	retval = timerlat_aa_init(top, nr_cpus, params->dump_tasks);
+	if (retval)
+		goto out_err;
+
 	return top;
 
 out_err:
@@ -688,6 +725,10 @@ int timerlat_top_main(int argc, char *argv[])
 
 	if (trace_is_off(&top->trace, &record->trace)) {
 		printf("rtla timerlat hit stop tracing\n");
+
+		if (!params->no_aa)
+			timerlat_auto_analysis(params->stop_us, params->stop_total_us);
+
 		if (params->trace_output) {
 			printf("  Saving trace to %s\n", params->trace_output);
 			save_trace_to_file(record->trace.inst, params->trace_output);
@@ -701,6 +742,7 @@ int timerlat_top_main(int argc, char *argv[])
 	params->events = NULL;
 out_free:
 	timerlat_free_top(top->data);
+	timerlat_aa_destroy();
 	osnoise_destroy_tool(record);
 	osnoise_destroy_tool(top);
 	free(params);
-- 
2.39.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ