linux-kernel - Re: [PATCH v6 2/2] tracing: Add support for preempt and irq enable/disable events

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJWu+oqjGQ_FP5x8QMvVji2vHWhB+B7oaZc5ZSGVEHzHm-XJ3w@mail.gmail.com>
Date:   Fri, 22 Sep 2017 11:28:30 -0700
From:   Joel Fernandes <joelaf@...gle.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     LKML <linux-kernel@...r.kernel.org>, kernel-team@...roid.com,
        Steven Rostedt <rostedt@...dmis.org>
Subject: Re: [PATCH v6 2/2] tracing: Add support for preempt and irq
 enable/disable events

Hi Peter,

On Fri, Sep 22, 2017 at 2:02 AM, Peter Zijlstra <peterz@...radead.org> wrote:
> On Thu, Sep 21, 2017 at 06:50:24PM -0700, Joel Fernandes wrote:
>> +DECLARE_EVENT_CLASS(preemptirq_template,
>> +
>> +     TP_PROTO(unsigned long ip, unsigned long parent_ip),
>> +
>> +     TP_ARGS(ip, parent_ip),
>> +
>> +     TP_STRUCT__entry(
>> +             __field(u32, caller_offs)
>> +             __field(u32, parent_offs)
>> +     ),
>> +
>> +     TP_fast_assign(
>> +             __entry->caller_offs = (u32)(ip - (unsigned long)_stext);
>> +             __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext);
>> +     ),
>> +
>> +     TP_printk("caller=%pF parent=%pF",
>> +               (void *)((unsigned long)(_stext) + __entry->caller_offs),
>> +               (void *)((unsigned long)(_stext) + __entry->parent_offs))
>> +);
>
> So I don't get that function prototype. Why do we need two IPs?

This follows the exact same "model" used by the preempt/irqsoff
tracer. The rationale IIUC is a lot of times the actual preempt event
is done by say a lock, but the actual caller of the lock function
causing the preempt disable event is also of interest so this gives
some more information of interest without needing the full stack, for
example the spinlock acquiring in add_wait_queue disabled preemption
here:

            grep-1041  [002] d..1    80.363455: preempt_disable:
caller=_raw_spin_lock_irqsave+0x1d/0x40
parent=add_wait_queue+0x15/0x50

>
>> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
>> index 26c688740742..f3087e7278cb 100644
>> --- a/kernel/trace/trace_irqsoff.c
>> +++ b/kernel/trace/trace_irqsoff.c
>> @@ -16,6 +16,9 @@
>>
>>  #include "trace.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <trace/events/preemptirq.h>
>> +
>>  #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
>>  static struct trace_array            *irqsoff_trace __read_mostly;
>>  static int                           tracer_enabled __read_mostly;
>> @@ -777,14 +780,29 @@ static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { }
>>  static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { }
>>  #endif
>>
>> +static DEFINE_PER_CPU(int, tracing_irq_cpu);
>> +static DEFINE_PER_CPU(int, tracing_preempt_cpu);
>> +
>>  void start_critical_timings(void)
>>  {
>> +     if (this_cpu_read(tracing_preempt_cpu))
>> +             trace_preempt_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>>       start_critical_timings_tracer();
>>  }
>>  EXPORT_SYMBOL_GPL(start_critical_timings);
>>
>>  void stop_critical_timings(void)
>>  {
>> +     if (this_cpu_read(tracing_preempt_cpu))
>> +             trace_preempt_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>>       stop_critical_timings_tracer();
>>  }
>>  EXPORT_SYMBOL_GPL(stop_critical_timings);
>
> And I feel these yield somewhat odd semantics, does that need explaining
> somewhere?

Maybe I can add a comment here, if you prefer that. When you meant
semantics, do you mean 'critical' vs 'atomic' thing or do you mean the
semantics/context of how this function is supposed to be used?

>
>> @@ -792,24 +810,48 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
>>  #if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING)
>>  void trace_hardirqs_on(void)
>>  {
>> +     if (!this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>       tracer_hardirqs_on();
>> +
>> +     this_cpu_write(tracing_irq_cpu, 0);
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_on);
>>
>>  void trace_hardirqs_off(void)
>>  {
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     this_cpu_write(tracing_irq_cpu, 1);
>> +
>> +     trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>       tracer_hardirqs_off();
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_off);
>>
>>  __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
>>  {
>> +     if (!this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
>>       tracer_hardirqs_on_caller(caller_addr);
>> +
>> +     this_cpu_write(tracing_irq_cpuHopefully I addressed your concern properly here?
, 0);
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_on_caller);
>>
>>  __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
>>  {
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     this_cpu_write(tracing_irq_cpu, 1);
>> +
>> +     trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
>>       tracer_hardirqs_off_caller(caller_addr);
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_off_caller);
>> @@ -831,14 +873,27 @@ inline void print_irqtrace_events(struct task_struct *curr)
>>  }
>>  #endif
>>
>> -#ifdef CONFIG_PREEMPT_TRACER
>> +#if defined(CONFIG_PREEMPT_TRACER) || \
>> +     (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
>>  void trace_preempt_on(unsigned long a0, unsigned long a1)
>>  {
>> +     if (!this_cpu_read(tracing_preempt_cpu))
>> +             return;
>> +
>> +     trace_preempt_enable_rcuidle(a0, a1);
>>       tracer_preempt_on(a0, a1);
>> +
>> +     this_cpu_write(tracing_preempt_cpu, 0);
>>  }
>>
>>  void trace_preempt_off(unsigned long a0, unsigned long a1)
>>  {
>> +     if (this_cpu_read(tracing_preempt_cpu))
>> +             return;
>> +
>> +     this_cpu_write(tracing_preempt_cpu, 1);
>> +
>> +     trace_preempt_disable_rcuidle(a0, a1);
>>       tracer_preempt_off(a0, a1);
>>  }
>>  #endif
>
> And here you assume things like double on / double off don't happen,
> which might well be so, but does seem somewhat fragile.
>

We are handling the cases where these functions might be called twice,
but we are only interested in the first time they're called. I caught
a dead lock happen when I didn't add such protection to
trace_hardirqs_off so I added to these to the trace_hardirqs* and
trace_preempt* ones as well to just to be extra safe and keep it
consistent. Hope I understood your concern correctly, if not please
let me know, thanks.

>
> So all in all I don't see anything particularly wrong, but I see lots
> that could do with explaining.

Sure, thanks for taking a look :) Let me know if you had any other
comments about this patch :)

regards,

- Joel