lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 22 Sep 2017 11:28:30 -0700
From:   Joel Fernandes <joelaf@...gle.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     LKML <linux-kernel@...r.kernel.org>, kernel-team@...roid.com,
        Steven Rostedt <rostedt@...dmis.org>
Subject: Re: [PATCH v6 2/2] tracing: Add support for preempt and irq
 enable/disable events

Hi Peter,

On Fri, Sep 22, 2017 at 2:02 AM, Peter Zijlstra <peterz@...radead.org> wrote:
> On Thu, Sep 21, 2017 at 06:50:24PM -0700, Joel Fernandes wrote:
>> +DECLARE_EVENT_CLASS(preemptirq_template,
>> +
>> +     TP_PROTO(unsigned long ip, unsigned long parent_ip),
>> +
>> +     TP_ARGS(ip, parent_ip),
>> +
>> +     TP_STRUCT__entry(
>> +             __field(u32, caller_offs)
>> +             __field(u32, parent_offs)
>> +     ),
>> +
>> +     TP_fast_assign(
>> +             __entry->caller_offs = (u32)(ip - (unsigned long)_stext);
>> +             __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext);
>> +     ),
>> +
>> +     TP_printk("caller=%pF parent=%pF",
>> +               (void *)((unsigned long)(_stext) + __entry->caller_offs),
>> +               (void *)((unsigned long)(_stext) + __entry->parent_offs))
>> +);
>
> So I don't get that function prototype. Why do we need two IPs?

This follows the exact same "model" used by the preempt/irqsoff
tracer. The rationale IIUC is a lot of times the actual preempt event
is done by say a lock, but the actual caller of the lock function
causing the preempt disable event is also of interest so this gives
some more information of interest without needing the full stack, for
example the spinlock acquiring in add_wait_queue disabled preemption
here:

            grep-1041  [002] d..1    80.363455: preempt_disable:
caller=_raw_spin_lock_irqsave+0x1d/0x40
parent=add_wait_queue+0x15/0x50

>
>> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
>> index 26c688740742..f3087e7278cb 100644
>> --- a/kernel/trace/trace_irqsoff.c
>> +++ b/kernel/trace/trace_irqsoff.c
>> @@ -16,6 +16,9 @@
>>
>>  #include "trace.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <trace/events/preemptirq.h>
>> +
>>  #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
>>  static struct trace_array            *irqsoff_trace __read_mostly;
>>  static int                           tracer_enabled __read_mostly;
>> @@ -777,14 +780,29 @@ static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { }
>>  static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { }
>>  #endif
>>
>> +static DEFINE_PER_CPU(int, tracing_irq_cpu);
>> +static DEFINE_PER_CPU(int, tracing_preempt_cpu);
>> +
>>  void start_critical_timings(void)
>>  {
>> +     if (this_cpu_read(tracing_preempt_cpu))
>> +             trace_preempt_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>>       start_critical_timings_tracer();
>>  }
>>  EXPORT_SYMBOL_GPL(start_critical_timings);
>>
>>  void stop_critical_timings(void)
>>  {
>> +     if (this_cpu_read(tracing_preempt_cpu))
>> +             trace_preempt_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>> +
>>       stop_critical_timings_tracer();
>>  }
>>  EXPORT_SYMBOL_GPL(stop_critical_timings);
>
> And I feel these yield somewhat odd semantics, does that need explaining
> somewhere?

Maybe I can add a comment here, if you prefer that. When you meant
semantics, do you mean 'critical' vs 'atomic' thing or do you mean the
semantics/context of how this function is supposed to be used?

>
>> @@ -792,24 +810,48 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
>>  #if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING)
>>  void trace_hardirqs_on(void)
>>  {
>> +     if (!this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>       tracer_hardirqs_on();
>> +
>> +     this_cpu_write(tracing_irq_cpu, 0);
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_on);
>>
>>  void trace_hardirqs_off(void)
>>  {
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     this_cpu_write(tracing_irq_cpu, 1);
>> +
>> +     trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>>       tracer_hardirqs_off();
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_off);
>>
>>  __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
>>  {
>> +     if (!this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
>>       tracer_hardirqs_on_caller(caller_addr);
>> +
>> +     this_cpu_write(tracing_irq_cpuHopefully I addressed your concern properly here?
, 0);
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_on_caller);
>>
>>  __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
>>  {
>> +     if (this_cpu_read(tracing_irq_cpu))
>> +             return;
>> +
>> +     this_cpu_write(tracing_irq_cpu, 1);
>> +
>> +     trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
>>       tracer_hardirqs_off_caller(caller_addr);
>>  }
>>  EXPORT_SYMBOL(trace_hardirqs_off_caller);
>> @@ -831,14 +873,27 @@ inline void print_irqtrace_events(struct task_struct *curr)
>>  }
>>  #endif
>>
>> -#ifdef CONFIG_PREEMPT_TRACER
>> +#if defined(CONFIG_PREEMPT_TRACER) || \
>> +     (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
>>  void trace_preempt_on(unsigned long a0, unsigned long a1)
>>  {
>> +     if (!this_cpu_read(tracing_preempt_cpu))
>> +             return;
>> +
>> +     trace_preempt_enable_rcuidle(a0, a1);
>>       tracer_preempt_on(a0, a1);
>> +
>> +     this_cpu_write(tracing_preempt_cpu, 0);
>>  }
>>
>>  void trace_preempt_off(unsigned long a0, unsigned long a1)
>>  {
>> +     if (this_cpu_read(tracing_preempt_cpu))
>> +             return;
>> +
>> +     this_cpu_write(tracing_preempt_cpu, 1);
>> +
>> +     trace_preempt_disable_rcuidle(a0, a1);
>>       tracer_preempt_off(a0, a1);
>>  }
>>  #endif
>
> And here you assume things like double on / double off don't happen,
> which might well be so, but does seem somewhat fragile.
>

We are handling the cases where these functions might be called twice,
but we are only interested in the first time they're called. I caught
a dead lock happen when I didn't add such protection to
trace_hardirqs_off so I added to these to the trace_hardirqs* and
trace_preempt* ones as well to just to be extra safe and keep it
consistent. Hope I understood your concern correctly, if not please
let me know, thanks.

>
> So all in all I don't see anything particularly wrong, but I see lots
> that could do with explaining.

Sure, thanks for taking a look :) Let me know if you had any other
comments about this patch :)

regards,

- Joel

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ