[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1369338693.6828.212.camel@gandalf.local.home>
Date: Thu, 23 May 2013 15:51:33 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: Seiji Aguchi <seiji.aguchi@....com>
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"x86@...nel.org" <x86@...nel.org>, "hpa@...or.com" <hpa@...or.com>,
"Thomas Gleixner (tglx@...utronix.de)" <tglx@...utronix.de>,
"'mingo@...e.hu' (mingo@...e.hu)" <mingo@...e.hu>,
"Borislav Petkov (bp@...en8.de)" <bp@...en8.de>,
"linux-edac@...r.kernel.org" <linux-edac@...r.kernel.org>,
"Luck, Tony (tony.luck@...el.com)" <tony.luck@...el.com>,
"dle-develop@...ts.sourceforge.net"
<dle-develop@...ts.sourceforge.net>,
Tomoki Sekiyama <tomoki.sekiyama@....com>
Subject: Re: [PATCH v12 2/3] trace,x86: add x86 irq vector tracepoints
On Fri, 2013-04-05 at 19:20 +0000, Seiji Aguchi wrote:
> Signed-off-by: Seiji Aguchi <seiji.aguchi@....com>
> ---
> arch/x86/include/asm/desc.h | 36 +++++++-
> arch/x86/include/asm/entry_arch.h | 5 +-
> arch/x86/include/asm/hw_irq.h | 16 +++
> arch/x86/include/asm/mshyperv.h | 1 +
> arch/x86/include/asm/trace/irq_vectors.h | 159 ++++++++++++++++++++++++++++++
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/apic/apic.c | 94 ++++++++++++++++++
> arch/x86/kernel/cpu/common.c | 12 ++-
> arch/x86/kernel/cpu/mcheck/therm_throt.c | 14 +++
> arch/x86/kernel/cpu/mcheck/threshold.c | 14 +++
> arch/x86/kernel/entry_32.S | 12 ++-
> arch/x86/kernel/entry_64.S | 29 +++++-
> arch/x86/kernel/head_64.S | 6 +
> arch/x86/kernel/irq.c | 23 +++++
> arch/x86/kernel/irq_work.c | 12 +++
> arch/x86/kernel/smp.c | 35 +++++++
> arch/x86/kernel/tracepoint.c | 71 +++++++++++++
> include/xen/events.h | 3 +
> 18 files changed, 529 insertions(+), 14 deletions(-)
> create mode 100644 arch/x86/include/asm/trace/irq_vectors.h
> create mode 100644 arch/x86/kernel/tracepoint.c
>
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index 8bf1c06..e718c72 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -39,6 +39,9 @@ extern gate_desc idt_table[];
> extern struct desc_ptr nmi_idt_descr;
> extern gate_desc nmi_idt_table[];
>
> +extern struct desc_ptr trace_idt_descr;
> +extern DEFINE_PER_CPU(u32, trace_idt_in_use);
> +extern DEFINE_PER_CPU(u32, debug_stack_use_ctr);
Header files should use DECLARE_PER_CPU() not DEFINE_PER_CPU. And drop
the "extern" when doing so, as it's implicit in the macro.
> struct gdt_page {
> struct desc_struct gdt[GDT_ENTRIES];
> } __attribute__((aligned(PAGE_SIZE)));
> @@ -320,6 +323,17 @@ static inline void set_nmi_gate(int gate, void *addr)
> }
> #endif
>
> +#ifdef CONFIG_TRACING
> +extern gate_desc trace_idt_table[];
> +static inline void trace_set_intr_gate(unsigned int gate, void *addr)
> +{
> + gate_desc s;
> +
> + pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
> + write_idt_entry(trace_idt_table, gate, &s);
> +}
Perhaps add a:
static inline void write_trace_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
{
write_idt_entry(idt, entry, gate);
}
#else
static inline void write_trace_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
{
}
> +#endif
Then you can below just do:
> +
> static inline void _set_gate(int gate, unsigned type, void *addr,
> unsigned dpl, unsigned ist, unsigned seg)
> {
> @@ -331,6 +345,9 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
> * setup time
> */
> write_idt_entry(idt_table, gate, &s);
write_trace_idt_entry(idt_table, gate, &s);
and remove the #ifdef. Makes it nice to read.
> +#ifdef CONFIG_TRACING
> + write_idt_entry(trace_idt_table, gate, &s);
> +#endif
> }
>
> /*
> @@ -360,12 +377,27 @@ static inline void alloc_system_vector(int vector)
> }
> }
>
> -static inline void alloc_intr_gate(unsigned int n, void *addr)
> +#ifdef CONFIG_TRACING
> +static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
> +{
> + trace_set_intr_gate(n, addr);
> +}
> +#else
> +#define __trace_alloc_intr_gate(n, addr)
> +#endif
> +
> +static inline void __alloc_intr_gate(unsigned int n, void *addr)
> {
> - alloc_system_vector(n);
> set_intr_gate(n, addr);
> }
>
> +#define alloc_intr_gate(n, addr) \
> + do { \
> + alloc_system_vector(n); \
> + __alloc_intr_gate(n, addr); \
> + __trace_alloc_intr_gate(n, trace_##addr); \
> + } while (0)
> +
> /*
> * This routine sets up an interrupt gate at directory privilege level 3.
> */
> diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
> index 40afa00..0bb99d8 100644
> --- a/arch/x86/include/asm/entry_arch.h
> +++ b/arch/x86/include/asm/entry_arch.h
> @@ -13,8 +13,9 @@
> BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
> BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
> BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
> -BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
> -BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
> +BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
> + smp_irq_move_cleanup_interrupt)
> +BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
> #endif
>
> BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> index 10a78c3..e84e91a 100644
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -76,6 +76,22 @@ extern void threshold_interrupt(void);
> extern void call_function_interrupt(void);
> extern void call_function_single_interrupt(void);
>
> +#ifdef CONFIG_TRACING
> +/* Interrupt handlers registered during init_IRQ */
> +extern void trace_apic_timer_interrupt(void);
> +extern void trace_x86_platform_ipi(void);
> +extern void trace_error_interrupt(void);
> +extern void trace_irq_work_interrupt(void);
> +extern void trace_spurious_interrupt(void);
> +extern void trace_thermal_interrupt(void);
> +extern void trace_reschedule_interrupt(void);
> +extern void trace_threshold_interrupt(void);
> +extern void trace_call_function_interrupt(void);
> +extern void trace_call_function_single_interrupt(void);
> +#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
> +#define trace_reboot_interrupt reboot_interrupt
> +#endif /* CONFIG_TRACING */
> +
> /* IOAPIC */
> #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
> extern unsigned long io_apic_irqs;
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index c2934be..cc40e03 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -12,6 +12,7 @@ struct ms_hyperv_info {
> extern struct ms_hyperv_info ms_hyperv;
>
> void hyperv_callback_vector(void);
> +#define trace_hyperv_callback_vector hyperv_callback_vector
> void hyperv_vector_handler(struct pt_regs *regs);
> void hv_register_vmbus_handler(int irq, irq_handler_t handler);
>
> diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
> new file mode 100644
> index 0000000..b4f1c53
> --- /dev/null
> +++ b/arch/x86/include/asm/trace/irq_vectors.h
> @@ -0,0 +1,159 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM irq_vectors
> +
> +#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_IRQ_VECTORS_H
> +
> +#include <linux/tracepoint.h>
> +
> +extern void trace_irq_vector_regfunc(void);
> +extern void trace_irq_vector_unregfunc(void);
> +
> +DECLARE_EVENT_CLASS(x86_irq_vector,
> +
> + TP_PROTO(int vector),
> +
> + TP_ARGS(vector),
> +
> + TP_STRUCT__entry(
> + __field( int, vector )
> + ),
> +
> + TP_fast_assign(
> + __entry->vector = vector;
> + ),
> +
> + TP_printk("vector=%d", __entry->vector) );
> +
> +#define DEFINE_IRQ_VECTOR_EVENT(name) \
> +DEFINE_EVENT_FN(x86_irq_vector, name, \
> + TP_PROTO(int vector), \
> + TP_ARGS(vector), \
> + trace_irq_vector_regfunc, \
> + trace_irq_vector_unregfunc);
> +
> +/*
> + * local_timer_entry - called before entering a local timer interrupt
> + * vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(local_timer_entry);
> +
> +/*
> + * local_timer_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(local_timer_exit);
Looks like you can save on repeat code by doing the following:
#define DEFINE_IRQ_VECTOR_EVENT(name) \
DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
TP_PROTO(int vector), \
TP_ARGS(vector), \
trace_irq_vector_regfunc, \
trace_irq_vector_unregfunc); \
DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
TP_PROTO(int vector), \
TP_ARGS(vector), \
trace_irq_vector_regfunc, \
trace_irq_vector_unregfunc); \
Then just do for each:
DEFINE_IRQ_VECTOR_EVENT(local_timer);
[...]
> +
> +/*
> + * reschedule_entry - called before entering a reschedule vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(reschedule_entry);
> +
> +/*
> + * reschedule_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(reschedule_exit);
> +
> +/*
> + * spurious_apic_entry - called before entering a spurious apic vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(spurious_apic_entry);
> +
> +/*
> + * spurious_apic_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(spurious_apic_exit);
> +
> +/*
> + * error_apic_entry - called before entering an error apic vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(error_apic_entry);
> +
> +/*
> + * error_apic_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(error_apic_exit);
> +
> +/*
> + * x86_platform_ipi_entry - called before entering a x86 platform ipi interrupt
> + * vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi_entry);
> +
> +/*
> + * x86_platform_ipi_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi_exit);
> +
> +/*
> + * irq_work_entry - called before entering a irq work interrupt
> + * vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(irq_work_entry);
> +
> +/*
> + * irq_work_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(irq_work_exit);
> +
> +/*
> + * call_function_entry - called before entering a call function interrupt
> + * vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(call_function_entry);
> +
> +/*
> + * call_function_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(call_function_exit);
> +
> +/*
> + * call_function_single_entry - called before entering a call function
> + * single interrupt vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(call_function_single_entry);
> +
> +/*
> + * call_function_single_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(call_function_single_exit);
> +
> +/*
> + * threshold_apic_entry - called before entering a threshold apic interrupt
> + * vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(threshold_apic_entry);
> +
> +/*
> + * threshold_apic_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(threshold_apic_exit);
> +
> +/*
> + * thermal_apic_entry - called before entering a thermal apic interrupt
> + * vector handler
> + */
> +DEFINE_IRQ_VECTOR_EVENT(thermal_apic_entry);
> +
> +/*
> + * thrmal_apic_exit - called immediately after the interrupt vector
> + * handler returns
> + */
> +DEFINE_IRQ_VECTOR_EVENT(thermal_apic_exit);
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH ../../arch/x86/include/asm/trace
Hmm, this is dangerous. It's better to use the Makefile.
here do:
#define TRACE_INCLUDE_PATH .
> +#define TRACE_INCLUDE_FILE irq_vectors
> +#endif /* _TRACE_IRQ_VECTORS_H */
Then in arch/x86/kernel/apic/Makefile:
CFLAGS_apic.o := -I$(src)
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> +
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 7bd3bd3..74b3891 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -102,6 +102,7 @@ obj-$(CONFIG_OF) += devicetree.o
> obj-$(CONFIG_UPROBES) += uprobes.o
>
> obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
> +obj-$(CONFIG_TRACING) += tracepoint.o
>
> ###
> # 64 bit specific files
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index 904611b..7d39c68 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -55,6 +55,9 @@
> #include <asm/tsc.h>
> #include <asm/hypervisor.h>
>
> +#define CREATE_TRACE_POINTS
> +#include <asm/trace/irq_vectors.h>
> +
> unsigned int num_processors;
>
> unsigned disabled_cpus __cpuinitdata;
> @@ -934,6 +937,30 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
> set_irq_regs(old_regs);
> }
>
> +void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + /*
> + * NOTE! We'd better ACK the irq immediately,
> + * because timer handling can be slow.
> + */
> + ack_APIC_irq();
> + /*
> + * update_process_times() expects us to have done irq_enter().
> + * Besides, if we don't timer interrupts ignore the global
> + * interrupt lock, which is the WrongThing (tm) to do.
> + */
> + irq_enter();
> + exit_idle();
> + trace_local_timer_entry(LOCAL_TIMER_VECTOR);
> + local_apic_timer_interrupt();
> + trace_local_timer_exit(LOCAL_TIMER_VECTOR);
> + irq_exit();
> +
> + set_irq_regs(old_regs);
> +}
> +
> int setup_profiling_timer(unsigned int multiplier)
> {
> return -EINVAL;
> @@ -1930,6 +1957,31 @@ void smp_spurious_interrupt(struct pt_regs *regs)
> irq_exit();
> }
>
> +void smp_trace_spurious_interrupt(struct pt_regs *regs)
> +{
> + u32 v;
> +
> + irq_enter();
> + exit_idle();
> + trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
> + /*
> + * Check if this really is a spurious interrupt and ACK it
> + * if it is a vectored one. Just in case...
> + * Spurious interrupts should not be ACKed.
> + */
> + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
> + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
> + ack_APIC_irq();
> +
> + inc_irq_stat(irq_spurious_count);
> +
> + /* see sw-dev-man vol 3, chapter 7.4.13.5 */
> + pr_info("spurious APIC interrupt on CPU#%d, "
> + "should never happen.\n", smp_processor_id());
> + trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
> + irq_exit();
> +}
> +
> /*
> * This interrupt should never happen with our APIC/SMP architecture
> */
> @@ -1973,6 +2025,48 @@ void smp_error_interrupt(struct pt_regs *regs)
> irq_exit();
> }
>
> +void smp_trace_error_interrupt(struct pt_regs *regs)
> +{
> + u32 v0, v1;
> + u32 i = 0;
> + static const char * const error_interrupt_reason[] = {
> + "Send CS error", /* APIC Error Bit 0 */
> + "Receive CS error", /* APIC Error Bit 1 */
> + "Send accept error", /* APIC Error Bit 2 */
> + "Receive accept error", /* APIC Error Bit 3 */
> + "Redirectable IPI", /* APIC Error Bit 4 */
> + "Send illegal vector", /* APIC Error Bit 5 */
> + "Received illegal vector", /* APIC Error Bit 6 */
> + "Illegal register address", /* APIC Error Bit 7 */
> + };
> +
> + irq_enter();
> + exit_idle();
> + trace_error_apic_entry(ERROR_APIC_VECTOR);
> + /* First tickle the hardware, only then report what went on. -- REW */
> + v0 = apic_read(APIC_ESR);
> + apic_write(APIC_ESR, 0);
> + v1 = apic_read(APIC_ESR);
> + ack_APIC_irq();
> + atomic_inc(&irq_err_count);
> +
> + apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
> + smp_processor_id(), v0 , v1);
> +
> + v1 = v1 & 0xff;
> + while (v1) {
> + if (v1 & 0x1)
> + apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
> + i++;
> + v1 >>= 1;
> + }
> +
> + apic_printk(APIC_DEBUG, KERN_CONT "\n");
> +
> + trace_error_apic_exit(ERROR_APIC_VECTOR);
> + irq_exit();
> +}
> +
> /**
> * connect_bsp_APIC - attach the APIC to the interrupt system
> */
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index d814772..23045cd 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1144,7 +1144,7 @@ int is_debug_stack(unsigned long addr)
> addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
> }
>
> -static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
> +DEFINE_PER_CPU(u32, debug_stack_use_ctr);
>
> void debug_stack_set_zero(void)
> {
> @@ -1156,8 +1156,14 @@ void debug_stack_reset(void)
> {
> if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
> return;
> - if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
> - load_idt((const struct desc_ptr *)&idt_descr);
> + if (this_cpu_dec_return(debug_stack_use_ctr) == 0) {
> +#ifdef CONFIG_TRACING
> + if (this_cpu_read(trace_idt_in_use))
> + load_idt((const struct desc_ptr *)&trace_idt_descr);
> + else
> +#endif
> + load_idt((const struct desc_ptr *)&idt_descr);
> + }
I have a patch that makes this more generic. I'm wondering if that would
be better than having everything so coupled.
> }
>
> #else /* CONFIG_X86_64 */
> diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
> index 47a1870..e7aa7fc 100644
> --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
> +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
> @@ -29,6 +29,7 @@
> #include <asm/idle.h>
> #include <asm/mce.h>
> #include <asm/msr.h>
> +#include <asm/trace/irq_vectors.h>
>
> /* How long to wait between reporting thermal events */
> #define CHECK_INTERVAL (300 * HZ)
> @@ -389,6 +390,19 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
> ack_APIC_irq();
> }
>
> +asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
> +{
> + irq_enter();
> + exit_idle();
> + trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
> + inc_irq_stat(irq_thermal_count);
> + smp_thermal_vector();
> + trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
> + irq_exit();
> + /* Ack only at the end to avoid potential reentry */
> + ack_APIC_irq();
> +}
> +
> /* Thermal monitoring depends on APIC, ACPI and clock modulation */
> static int intel_thermal_supported(struct cpuinfo_x86 *c)
> {
> diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
> index aa578ca..0cbef99 100644
> --- a/arch/x86/kernel/cpu/mcheck/threshold.c
> +++ b/arch/x86/kernel/cpu/mcheck/threshold.c
> @@ -8,6 +8,7 @@
> #include <asm/apic.h>
> #include <asm/idle.h>
> #include <asm/mce.h>
> +#include <asm/trace/irq_vectors.h>
>
> static void default_threshold_interrupt(void)
> {
> @@ -27,3 +28,16 @@ asmlinkage void smp_threshold_interrupt(void)
> /* Ack only at the end to avoid potential reentry */
> ack_APIC_irq();
> }
> +
> +asmlinkage void smp_trace_threshold_interrupt(void)
> +{
> + irq_enter();
> + exit_idle();
> + trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
> + inc_irq_stat(irq_threshold_count);
> + mce_threshold_vector();
> + trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
> + irq_exit();
> + /* Ack only at the end to avoid potential reentry */
> + ack_APIC_irq();
> +}
> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
> index 8f3e2de..2cfbc3a 100644
> --- a/arch/x86/kernel/entry_32.S
> +++ b/arch/x86/kernel/entry_32.S
> @@ -801,7 +801,17 @@ ENTRY(name) \
> CFI_ENDPROC; \
> ENDPROC(name)
>
> -#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
> +
> +#ifdef CONFIG_TRACING
> +#define TRACE_BUILD_INTERRUPT(name, nr) \
> + BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
> +#else
> +#define TRACE_BUILD_INTERRUPT(name, nr)
> +#endif
> +
> +#define BUILD_INTERRUPT(name, nr) \
> + BUILD_INTERRUPT3(name, nr, smp_##name); \
> + TRACE_BUILD_INTERRUPT(name, nr)
>
> /* The include is where all of the SMP etc. interrupts come from */
> #include <asm/entry_arch.h>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index c1d01e6..4570477 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1138,7 +1138,7 @@ END(common_interrupt)
> /*
> * APIC interrupts.
> */
> -.macro apicinterrupt num sym do_sym
> +.macro apicinterrupt3 num sym do_sym
> ENTRY(\sym)
> INTR_FRAME
> ASM_CLAC
> @@ -1150,15 +1150,32 @@ ENTRY(\sym)
> END(\sym)
> .endm
>
> +#ifdef CONFIG_TRACING
> +#define trace(sym) trace_##sym
> +#define smp_trace(sym) smp_trace_##sym
> +
> +.macro trace_apicinterrupt num sym
> +apicinterrupt3 \num trace(\sym) smp_trace(\sym)
> +.endm
> +#else
> +.macro trace_apicinterrupt num sym do_sym
> +.endm
> +#endif
> +
> +.macro apicinterrupt num sym do_sym
> +apicinterrupt3 \num \sym \do_sym
> +trace_apicinterrupt \num \sym
> +.endm
> +
> #ifdef CONFIG_SMP
> -apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
> +apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
> irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
> -apicinterrupt REBOOT_VECTOR \
> +apicinterrupt3 REBOOT_VECTOR \
> reboot_interrupt smp_reboot_interrupt
> #endif
>
> #ifdef CONFIG_X86_UV
> -apicinterrupt UV_BAU_MESSAGE \
> +apicinterrupt3 UV_BAU_MESSAGE \
> uv_bau_message_intr1 uv_bau_message_interrupt
> #endif
> apicinterrupt LOCAL_TIMER_VECTOR \
> @@ -1446,13 +1463,13 @@ ENTRY(xen_failsafe_callback)
> CFI_ENDPROC
> END(xen_failsafe_callback)
>
> -apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
> +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
> xen_hvm_callback_vector xen_evtchn_do_upcall
>
> #endif /* CONFIG_XEN */
>
> #if IS_ENABLED(CONFIG_HYPERV)
> -apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
> +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
> hyperv_callback_vector hyperv_vector_handler
> #endif /* CONFIG_HYPERV */
>
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 6859e96..e827e67 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -518,6 +518,12 @@ ENTRY(idt_table)
> ENTRY(nmi_idt_table)
> .skip IDT_ENTRIES * 16
>
> +#ifdef CONFIG_TRACING
> + .align L1_CACHE_BYTES
> +ENTRY(trace_idt_table)
> + .skip IDT_ENTRIES * 16
> +#endif
> +
> __PAGE_ALIGNED_BSS
> NEXT_PAGE(empty_zero_page)
> .skip PAGE_SIZE
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index e4595f1..216bec1 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -17,6 +17,7 @@
> #include <asm/idle.h>
> #include <asm/mce.h>
> #include <asm/hw_irq.h>
> +#include <asm/trace/irq_vectors.h>
>
> atomic_t irq_err_count;
>
> @@ -228,6 +229,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
> set_irq_regs(old_regs);
> }
>
> +void smp_trace_x86_platform_ipi(struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + ack_APIC_irq();
> +
> + irq_enter();
> +
> + exit_idle();
> +
> + trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
> + inc_irq_stat(x86_platform_ipis);
> +
> + if (x86_platform_ipi_callback)
> + x86_platform_ipi_callback();
> +
> + trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
> + irq_exit();
> +
> + set_irq_regs(old_regs);
> +}
> +
> EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
>
> #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
> index ca8f703..09e6262 100644
> --- a/arch/x86/kernel/irq_work.c
> +++ b/arch/x86/kernel/irq_work.c
> @@ -8,6 +8,7 @@
> #include <linux/irq_work.h>
> #include <linux/hardirq.h>
> #include <asm/apic.h>
> +#include <asm/trace/irq_vectors.h>
>
> void smp_irq_work_interrupt(struct pt_regs *regs)
> {
> @@ -18,6 +19,17 @@ void smp_irq_work_interrupt(struct pt_regs *regs)
> irq_exit();
> }
>
> +void smp_trace_irq_work_interrupt(struct pt_regs *regs)
> +{
> + irq_enter();
> + ack_APIC_irq();
> + trace_irq_work_entry(IRQ_WORK_VECTOR);
> + inc_irq_stat(apic_irq_work_irqs);
> + irq_work_run();
> + trace_irq_work_exit(IRQ_WORK_VECTOR);
> + irq_exit();
> +}
> +
> void arch_irq_work_raise(void)
> {
> #ifdef CONFIG_X86_LOCAL_APIC
> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> index 48d2b7d..aad58af 100644
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -30,6 +30,7 @@
> #include <asm/proto.h>
> #include <asm/apic.h>
> #include <asm/nmi.h>
> +#include <asm/trace/irq_vectors.h>
> /*
> * Some notes on x86 processor bugs affecting SMP operation:
> *
> @@ -259,6 +260,18 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
> */
> }
>
> +void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> +{
> + ack_APIC_irq();
> + trace_reschedule_entry(RESCHEDULE_VECTOR);
> + inc_irq_stat(irq_resched_count);
> + scheduler_ipi();
> + trace_reschedule_exit(RESCHEDULE_VECTOR);
> + /*
> + * KVM uses this interrupt to force a cpu out of guest mode
> + */
> +}
> +
> void smp_call_function_interrupt(struct pt_regs *regs)
> {
> ack_APIC_irq();
> @@ -268,6 +281,17 @@ void smp_call_function_interrupt(struct pt_regs *regs)
> irq_exit();
> }
>
> +void smp_trace_call_function_interrupt(struct pt_regs *regs)
> +{
> + ack_APIC_irq();
> + irq_enter();
> + trace_call_function_entry(CALL_FUNCTION_VECTOR);
> + generic_smp_call_function_interrupt();
> + inc_irq_stat(irq_call_count);
> + trace_call_function_exit(CALL_FUNCTION_VECTOR);
> + irq_exit();
> +}
> +
> void smp_call_function_single_interrupt(struct pt_regs *regs)
> {
> ack_APIC_irq();
> @@ -277,6 +301,17 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
> irq_exit();
> }
>
> +void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
> +{
> + ack_APIC_irq();
> + irq_enter();
> + trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
> + generic_smp_call_function_single_interrupt();
> + inc_irq_stat(irq_call_count);
> + trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
> + irq_exit();
> +}
> +
> static int __init nonmi_ipi_setup(char *str)
> {
> smp_no_nmi_ipi = true;
> diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
> new file mode 100644
> index 0000000..3403dc3
> --- /dev/null
> +++ b/arch/x86/kernel/tracepoint.c
> @@ -0,0 +1,71 @@
> +/*
> + * Code for supporting irq vector tracepoints.
> + *
> + * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@....com>
> + *
> + */
> +#include <asm/hw_irq.h>
> +#include <asm/desc.h>
> +
> +struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
> + (unsigned long) trace_idt_table };
> +
> +#ifndef CONFIG_X86_64
> +gate_desc trace_idt_table[NR_VECTORS] __page_aligned_data
> + = { { { { 0, 0 } } }, };
> +#endif
> +
> +static int trace_irq_vector_refcount;
> +static DEFINE_MUTEX(irq_vector_mutex);
> +DEFINE_PER_CPU(u32, trace_idt_in_use);
> +
> +static void switch_to_trace_idt(void *arg)
> +{
> +
> + this_cpu_inc(trace_idt_in_use);
> + load_idt(&trace_idt_descr);
> +
> + return;
> +}
> +
> +static void restore_original_idt(void *arg)
> +{
> + if (WARN_ON(!this_cpu_read(trace_idt_in_use)))
> + return;
> + this_cpu_dec(trace_idt_in_use);
> +
> +#ifdef CONFIG_X86_64
> + if (this_cpu_read(debug_stack_use_ctr))
> + load_idt(&nmi_idt_descr);
> + else
> +#endif
This is unneeded, as debug_stack_use_ctr should never be set here. It
only gets set when interrupts are disabled and cleared before they are
enabled again, and this is called from interrupt context. If it is set,
then it is truly a bug.
Only NMIs themselves can see this set, if it wasn't the one to set it.
> + load_idt(&idt_descr);
> + return;
> +}
> +
> +void trace_irq_vector_regfunc(void)
> +{
> + mutex_lock(&irq_vector_mutex);
> + if (!trace_irq_vector_refcount) {
> + smp_call_function(switch_to_trace_idt, NULL, 0);
> + local_irq_disable();
> + switch_to_trace_idt(NULL);
> + local_irq_enable();
> + }
> + trace_irq_vector_refcount++;
> + mutex_unlock(&irq_vector_mutex);
> +}
> +
> +void trace_irq_vector_unregfunc(void)
> +{
> + mutex_lock(&irq_vector_mutex);
> + trace_irq_vector_refcount--;
> + if (!trace_irq_vector_refcount) {
Hmm, this needs to handle cpu hotplug. I just did the following:
echo 0 > /sys/devices/system/cpu/cpu3/online
echo 1 > /debug/tracing/events/irq_vectors/enable
echo 1 > /sys/devices/systems/cpu/cpu3/online
echo 0 > /debug/tracing/events/irq/vectors/enable
and got this:
[ 224.595931] ------------[ cut here ]------------
[ 224.596928] WARNING: at /home/rostedt/work/git/linux-trace.git/arch/x86/kernel/tracepoint.c:33 restore_original_idt+0x26/0x4e()
[ 224.596928] Modules linked in: ebtables ipt_MASQUERADE sunrpc bridge stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec kvm_intel snd_hwdep snd_seq kvm snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc shpchp microcode i2c_i801 pata_acpi firewire_ohci firewire_core crc_itu_t ata_generic i915 drm_kms_helper drm i2c_algo_bit i2c_core video
[ 224.596928] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.10.0-rc2-test+ #137
[ 224.596928] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
[ 224.596928] ffffffff817c948b ffff88007d583ef8 ffffffff814c9ec6 ffff88007d583f38
[ 224.596928] ffffffff810363a5 ffff88007d5971b8 0000000000000000 ffff88007d5971b8
[ 224.596928] ffff88007d583f68 0000000000000001 0000000000000000 ffff88007d583f48
[ 224.596928] Call Trace:
[ 224.596928] <IRQ> [<ffffffff814c9ec6>] dump_stack+0x19/0x1b
[ 224.596928] [<ffffffff810363a5>] warn_slowpath_common+0x67/0x80
[ 224.596928] [<ffffffff810363d8>] warn_slowpath_null+0x1a/0x1c
[ 224.596928] [<ffffffff8102864c>] restore_original_idt+0x26/0x4e
[ 224.596928] [<ffffffff8108bd52>] generic_smp_call_function_single_interrupt+0xb5/0xee
[ 224.596928] [<ffffffff8101f500>] smp_call_function_interrupt+0x18/0x27
[ 224.596928] [<ffffffff814d646f>] call_function_interrupt+0x6f/0x80
[ 224.596928] <EOI> [<ffffffff81009727>] ? default_idle+0x21/0x32
[ 224.596928] [<ffffffff81009725>] ? default_idle+0x1f/0x32
[ 224.596928] [<ffffffff81009e37>] arch_cpu_idle+0x18/0x22
[ 224.596928] [<ffffffff810799da>] cpu_startup_entry+0xc8/0x12e
[ 224.596928] [<ffffffff814bc6ad>] start_secondary+0x250/0x257
-- Steve
> + smp_call_function(restore_original_idt, NULL, 0);
> + local_irq_disable();
> + restore_original_idt(NULL);
> + local_irq_enable();
> + }
> + mutex_unlock(&irq_vector_mutex);
> +}
> +
> diff --git a/include/xen/events.h b/include/xen/events.h
> index c6bfe01..9216d07 100644
> --- a/include/xen/events.h
> +++ b/include/xen/events.h
> @@ -76,6 +76,9 @@ unsigned irq_from_evtchn(unsigned int evtchn);
>
> /* Xen HVM evtchn vector callback */
> void xen_hvm_callback_vector(void);
> +#ifdef CONFIG_TRACING
> +#define trace_xen_hvm_callback_vector xen_hvm_callback_vector
> +#endif
> extern int xen_have_vector_callback;
> int xen_set_callback_via(uint64_t via);
> void xen_evtchn_do_upcall(struct pt_regs *regs);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists