lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sat, 21 Feb 2009 04:39:17 +0100
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	Jason Baron <jbaron@...hat.com>
Cc:	Peter Zijlstra <peterz@...radead.org>,
	"Frank Ch. Eigler" <fche@...hat.com>, mingo@...e.hu,
	rostedt@...dmis.org, linux-kernel@...r.kernel.org,
	acme@...stprotocols.net, compudj@...stal.dyndns.org
Subject: Re: [PATCH] new irq tracer

On Fri, Feb 20, 2009 at 02:52:36PM -0500, Jason Baron wrote:
> On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote:
> > On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote:
> > 
> > > > I really am having a difficult time seeing the use in such narrow
> > > > tracers.
> > > 
> > > Part of the problem may come from defining "tracers" as something
> > > limited to ftrace engines.  Once such tracepoints are in the kernel,
> > > more powerful analytical tools may be attached to them.
> > 
> > ftrace graph tracer is by far the most powerful thing I've seen, there's
> > nothing limiting about ftrace.
> > 
> > What is limiting are these puny little tracers that have no real value.
> > 
> > A much better purpose for these tracepoints would be augmenting data in
> > existing tracers like the graph/function/sched tracer.
> > 
> 
> ok...using these two tracepoints i've augmented the graph tracer to add
> a new column that prints the irq # and handler we're in, or -1
> otherwise. its a new 'funcgraph-irq' in 'trace_options', sample output:
> 
> 
>  1) #-1:none        2.323 us    |                                  native_apic_mem_write();
>  1) #-1:none      + 21.891 us   |                                }
>  1) #-1:none                    |                                handle_IRQ_event() {


My dream would be to see:

1)                             |                                handle_IRQ_event(29:ahci) {


But it's not possible for now, not until we have the hashtable.
Anyway, instead of having #-1:none, it would be better to have nothing
in case of user context (or softirq).

Or actually why not just an ftrace_printk when the probe is on?
Since it immediately follows the call to handle_IRQ_event, we would have the
following displayed:

1)           |                                handle_IRQ_event() {
1)           |                                  /* 29 : ahci */

Because ftrace_printk send TRACE_PRINT entries which are displayed like this
by the function graph tracer.
It would be much more light than a whole column.


>  1) #29:ahci        1.938 us    |                                  irq_to_desc();
>  1) #29:ahci        2.626 us    |                                  runqueue_is_locked();
>  1) #29:ahci      + 21.173 us   |                                          }
> 		.
> 		.
> 		.
>  1) #29:ahci      + 32.047 us   |                                        }
>  1) #29:ahci      + 43.746 us   |                                      }
>  1) #29:ahci      + 51.634 us   |                                    }
>  1) #29:ahci      + 61.954 us   |                                  }
>  1) #-1:none        1.014 us    |                                  runqueue_is_locked();
>  1) #-1:none                    |                                  __wake_up() {
>  1) #-1:none        0.920 us    |                                    _spin_lock_irqsave();
>  1) #-1:none        1.028 us    |                                    __wake_up_common();
> 
> 
> This seems to work pretty well - although it does get confused by
> certain preemption events which it can't handle...I think this can make
> the logs more readable. I can probably get it working if ppl thinks its
> valuable...or have better ways of implementing it. Code is below.
> 
> thanks,
> 
> -Jason
> 
> 
> diff --git a/include/trace/irq.h b/include/trace/irq.h
> new file mode 100644
> index 0000000..ecec94a
> --- /dev/null
> +++ b/include/trace/irq.h
> @@ -0,0 +1,25 @@
> +#ifndef _TRACE_IRQ_H
> +#define _TRACE_IRQ_H
> +
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +enum {
> +        IRQ_ENTRY = 0,
> +        IRQ_EXIT = 1,
> +};
> +
> +struct irq_trace {
> +	int type;
> +	int irq;
> +	char irq_name[10];
> +};
> +
> +DECLARE_TRACE(irq_entry,
> +	TPPROTO(unsigned int id),
> +	TPARGS(id));
> +DECLARE_TRACE(irq_exit,
> +	TPPROTO(unsigned int id, irqreturn_t retval),
> +	TPARGS(id, retval));
> +
> +#endif /* _TRACE_IRQ_H */
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index fdff380..34053b5 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -18,6 +18,7 @@
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
>  #include <linux/bootmem.h>
> +#include <trace/irq.h>
>  
>  #include "internals.h"
>  
> @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
>  	return IRQ_NONE;
>  }
>  
> +DEFINE_TRACE(irq_entry);
> +DEFINE_TRACE(irq_exit);
> +
>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_entry(irq);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_exit(irq, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 627090b..58b9f7e 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
>  obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
>  obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
>  obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
> +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
>  
>  libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index dbff020..6a34aaa 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -11,6 +11,7 @@
>  #include <trace/boot.h>
>  #include <trace/kmemtrace.h>
>  #include <trace/power.h>
> +#include <trace/irq.h>
>  
>  enum trace_type {
>  	__TRACE_FIRST_TYPE = 0,
> @@ -33,6 +34,7 @@ enum trace_type {
>  	TRACE_KMEM_ALLOC,
>  	TRACE_KMEM_FREE,
>  	TRACE_POWER,
> +	TRACE_IRQ,
>  	TRACE_BLK,
>  
>  	__TRACE_LAST_TYPE,
> @@ -173,6 +175,11 @@ struct trace_power {
>  	struct power_trace	state_data;
>  };
>  
> +struct trace_irq {
> +	struct trace_entry	ent;
> +	struct irq_trace	irq_data;
> +};
> +
>  struct kmemtrace_alloc_entry {
>  	struct trace_entry	ent;
>  	enum kmemtrace_type_id type_id;
> @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
>  			  TRACE_GRAPH_RET);		\
>  		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
>   		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
> +		IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
>  			  TRACE_KMEM_ALLOC);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index 519a0ca..4c4c777 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -10,10 +10,19 @@
>  #include <linux/uaccess.h>
>  #include <linux/ftrace.h>
>  #include <linux/fs.h>
> +#include <trace/irq.h>
>  
>  #include "trace.h"
>  #include "trace_output.h"
>  
> +
> +struct per_cpu_irq_data {
> +	int irq_num;
> +	char action_str[10];
> +};
> +static struct per_cpu_irq_data cpu_irq_data[NR_CPUS];
> +static struct trace_array *graph_trace_tr;
> +
>  #define TRACE_GRAPH_INDENT	2
>  
>  /* Flag options */
> @@ -23,6 +32,7 @@
>  #define TRACE_GRAPH_PRINT_PROC		0x8
>  #define TRACE_GRAPH_PRINT_DURATION	0x10
>  #define TRACE_GRAPH_PRINT_ABS_TIME	0X20
> +#define TRACE_GRAPH_PRINT_IRQ		0X40
>  
>  static struct tracer_opt trace_opts[] = {
>  	/* Display overruns? (for self-debug purpose) */
> @@ -37,6 +47,8 @@ static struct tracer_opt trace_opts[] = {
>  	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
>  	/* Display absolute time of an entry */
>  	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
> +	/* Display irq number, -1 otherwise */
> +	{ TRACER_OPT(funcgraph-irq, TRACE_GRAPH_PRINT_IRQ) },
>  	{ } /* Empty entry */
>  };
>  
> @@ -52,10 +64,17 @@ static struct tracer_flags tracer_flags = {
>  
>  static int graph_trace_init(struct trace_array *tr)
>  {
> +	int i;
>  	int ret = register_ftrace_graph(&trace_graph_return,
>  					&trace_graph_entry);
>  	if (ret)
>  		return ret;
> +	graph_trace_tr = tr;
> +	for (i=0;i<NR_CPUS;i++) {
> +		cpu_irq_data[i].irq_num = -1;
> +		sprintf(cpu_irq_data[i].action_str, "none");
> +                cpu_irq_data[i].action_str[4] = '\0';
> +	}
>  	tracing_start_cmdline_record();
>  
>  	return 0;
> @@ -63,6 +82,7 @@ static int graph_trace_init(struct trace_array *tr)
>  
>  static void graph_trace_reset(struct trace_array *tr)
>  {
> +	graph_trace_tr = tr;
>  	tracing_stop_cmdline_record();
>  	unregister_ftrace_graph();
>  }
> @@ -110,6 +130,27 @@ print_graph_cpu(struct trace_seq *s, int cpu)
>  	return TRACE_TYPE_HANDLED;
>  }
>  
> +#define TRACE_GRAPH_IRQ_LENGTH     14
> +
> +static enum print_line_t
> +print_graph_irqnum(struct trace_seq *s, int cpu)
> +{
> +	char irq_str[TRACE_GRAPH_IRQ_LENGTH];
> +	int len, ret;
> +
> +	len = snprintf(irq_str, TRACE_GRAPH_IRQ_LENGTH, "#%d:%s", cpu_irq_data[cpu].irq_num, cpu_irq_data[cpu].action_str);
> +	ret = trace_seq_printf(s, "%s", irq_str);	
> +	if (!ret)
> +		return TRACE_TYPE_PARTIAL_LINE;
> +	while (len < TRACE_GRAPH_IRQ_LENGTH) {
> +		ret = trace_seq_printf(s, " ");
> +                if (!ret)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +		len++;
> +	}
> +	return TRACE_TYPE_HANDLED;
> +}
> +
>  #define TRACE_GRAPH_PROCINFO_LENGTH	14
>  
>  static enum print_line_t
> @@ -504,6 +545,13 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
>  			return TRACE_TYPE_PARTIAL_LINE;
>  	}
>  
> +	/* irq */
> +	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
> +		ret = print_graph_irqnum(s, cpu);
> +		if (ret == TRACE_TYPE_PARTIAL_LINE)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +	}
> +
>  	/* Proc */
>  	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
>  		ret = print_graph_proc(s, ent->pid);
> @@ -551,6 +599,13 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
>  			return TRACE_TYPE_PARTIAL_LINE;
>  	}
>  
> +	/* irq */
> +	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
> +		ret = print_graph_irqnum(s, cpu);
> +		if (ret == TRACE_TYPE_PARTIAL_LINE)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +	}
> +
>  	/* Proc */
>  	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
>  		ret = print_graph_proc(s, ent->pid);
> @@ -627,6 +682,13 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
>  			return TRACE_TYPE_PARTIAL_LINE;
>  	}
>  
> +	/* irq */
> +	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
> +		ret = print_graph_irqnum(s, cpu);
> +		if (ret == TRACE_TYPE_PARTIAL_LINE)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +	}
> +
>  	/* Proc */
>  	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
>  		ret = print_graph_proc(s, ent->pid);
> @@ -699,6 +761,22 @@ print_graph_function(struct trace_iterator *iter)
>  		trace_assign_type(field, entry);
>  		return print_graph_comment(field, s, entry, iter);
>  	}
> +	case TRACE_IRQ: {
> +		struct trace_irq *field;
> +		int cpu = iter->cpu;
> +
> +        	trace_assign_type(field, entry);
> +		if (field->irq_data.type == IRQ_ENTRY) {
> +			cpu_irq_data[cpu].irq_num = field->irq_data.irq;	
> +			strncpy(cpu_irq_data[cpu].action_str,field->irq_data.irq_name, 9);
> +			cpu_irq_data[cpu].action_str[9] = '\0';
> +		} else {
> +			cpu_irq_data[cpu].irq_num = -1;
> +			sprintf(cpu_irq_data[cpu].action_str, "none");
> +			cpu_irq_data[cpu].action_str[4] = '\0';
> +		}
> +		return TRACE_TYPE_HANDLED;
> +	}
>  	default:
>  		return TRACE_TYPE_UNHANDLED;
>  	}
> @@ -753,6 +831,67 @@ static void graph_trace_close(struct trace_iterator *iter)
>  	percpu_free(iter->private);
>  }
>  
> +static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
> +{
> +        struct ring_buffer_event *event;
> +        struct trace_irq *entry;
> +
> +        event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
> +                                          sizeof(*entry), 0, 0);
> +        if (!event)
> +                return;
> +        entry   = ring_buffer_event_data(event);
> +        entry->irq_data.type = IRQ_EXIT;
> +        trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
> +}
> +
> +static void probe_irq_entry(unsigned int id)
> +{
> +        struct ring_buffer_event *event;
> +        struct trace_irq *entry;
> +	struct irq_desc *desc;
> +
> +        event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
> +                                          sizeof(*entry), 0, 0);
> +        if (!event)
> +                return;
> +        entry   = ring_buffer_event_data(event);
> +        entry->irq_data.irq = id;
> +        entry->irq_data.type = IRQ_ENTRY;
> +	desc = irq_to_desc(id);
> +	strncpy(entry->irq_data.irq_name, desc->action->name, 9);
> +	entry->irq_data.irq_name[9] = '\0';
> +        trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
> +}
> +
> +static int graph_trace_set_flag(u32 old_flags, u32 bit, int set)
> +{
> +	int ret = 0;
> +
> +        if (bit == TRACE_GRAPH_PRINT_IRQ) {
> +		/* do nothing if already set */
> +		//if (!!set == !!(func_flags.val & TRACE_GRAPH_PRINT_IRQ))
> +                //        return 0;
> +		if (set) {
> +			ret = register_trace_irq_entry(probe_irq_entry);
> +        		if (ret) {
> +                		pr_info("irq trace: irq entry tracepoint failed to register\n");
> +                		return ret;
> +			}
> +			ret = register_trace_irq_exit(probe_irq_exit);
> +			if (ret) {
> +				pr_info("irq trace: irq exit tracepoint failed to register\n");
> +				unregister_trace_irq_entry(probe_irq_entry);
> +				return ret;
> +			}
> +		} else {
> +			unregister_trace_irq_entry(probe_irq_entry);	
> +			unregister_trace_irq_exit(probe_irq_exit);
> +		}
> +	}
> +	return ret;
> +}
> +
>  static struct tracer graph_trace __read_mostly = {
>  	.name	     	= "function_graph",
>  	.open		= graph_trace_open,
> @@ -762,6 +901,7 @@ static struct tracer graph_trace __read_mostly = {
>  	.print_line	= print_graph_function,
>  	.print_header	= print_graph_headers,
>  	.flags		= &tracer_flags,
> +	.set_flag       = graph_trace_set_flag,
>  #ifdef CONFIG_FTRACE_SELFTEST
>  	.selftest	= trace_selftest_startup_function_graph,
>  #endif
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ