lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c62985530810210528t416c82b2lbe8471322e8359c@mail.gmail.com>
Date:	Tue, 21 Oct 2008 14:28:52 +0200
From:	"Frédéric Weisbecker" <fweisbec@...il.com>
To:	"Ingo Molnar" <mingo@...e.hu>
Cc:	linux-kernel@...r.kernel.org,
	"Steven Rostedt" <rostedt@...dmis.org>
Subject: Re: [PATCH 5/5] tracing/ftrace: Introduce the big kernel lock tracer

2008/10/11 Frederic Weisbecker <fweisbec@...il.com>:
> Introduce the Big Kernel Lock tracer.
> This new tracer lets one to observe the latencies caused
> by the lock_kernel() function.
> It captures the time when the task request the spinlock,
> the time when the spinlock is hold and the time it is released.
> This way we can measure the latency for a task that wait for this
> spinlock and the latency caused by the time this lock is hold.
>
> I will surely add some improvements like:
>
> _ Adding a stack trace to know which function locked or unlocked the kernel.
> _ Adding a script to get some statistics about this: which functions hold it
> often, which hold it for a long time and some other things....
>
> Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> ---
>  include/trace/bkl.h      |   29 ++++++++
>  kernel/trace/Kconfig     |   11 +++
>  kernel/trace/Makefile    |    1 +
>  kernel/trace/trace.h     |   18 +++++
>  kernel/trace/trace_bkl.c |  170 ++++++++++++++++++++++++++++++++++++++++++++++
>  lib/kernel_lock.c        |   22 ++++++-
>  6 files changed, 249 insertions(+), 2 deletions(-)
>
> diff --git a/include/trace/bkl.h b/include/trace/bkl.h
> new file mode 100644
> index 0000000..bed2f1b
> --- /dev/null
> +++ b/include/trace/bkl.h
> @@ -0,0 +1,29 @@
> +#ifndef _TRACE_BKL_H
> +#define _TRACE_BKL_H
> +
> +
> +/* Entry which log the time when the task tried first to acquire the bkl
> + * and the time when it acquired it.
> + * That will let us know the latency to acquire the kernel lock.
> + */
> +struct bkl_trace_acquire {
> +       unsigned long long acquire_req_time;
> +       unsigned long long acquire_time;
> +};
> +
> +/* This will log the time when the task released the bkl.
> + * So we will know the whole duration of its kernel lock.
> + */
> +struct bkl_trace_release {
> +       unsigned long long release_time;
> +};
> +
> +#ifdef CONFIG_BKL_TRACER
> +extern void trace_bkl_acquire(struct bkl_trace_acquire *trace);
> +extern void trace_bkl_release(struct bkl_trace_release *trace);
> +#else
> +static void trace_bkl_acquire(struct bkl_trace_acquire *trace) { }
> +static void trace_bkl_release(struct bkl_trace_release *trace) { }
> +#endif
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 396aea1..fde7700 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -144,6 +144,17 @@ config STACK_TRACER
>          This tracer records the max stack of the kernel, and displays
>          it in debugfs/tracing/stack_trace
>
> +config BKL_TRACER
> +       bool "Trace Big Kernel Lock latencies"
> +       depends on HAVE_FTRACE
> +       depends on DEBUG_KERNEL
> +       select FTRACE
> +       select STACKTRACE
> +       help
> +         This tracer records the latencies issued by the Big Kernel Lock.
> +         It traces the time when a task request the bkl, the time when it
> +         acquires it and the time when it releases it.
> +
>  config DYNAMIC_FTRACE
>        bool "enable/disable ftrace tracepoints dynamically"
>        depends on FTRACE
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index a85dfba..2ba8904 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
>  obj-$(CONFIG_STACK_TRACER) += trace_stack.o
>  obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
>  obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
> +obj-$(CONFIG_BKL_TRACER) += trace_bkl.o
>
>  libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 707620c..b1df671 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -8,6 +8,7 @@
>  #include <linux/ring_buffer.h>
>  #include <linux/mmiotrace.h>
>  #include <linux/ftrace.h>
> +#include <trace/bkl.h>
>
>  enum trace_type {
>        __TRACE_FIRST_TYPE = 0,
> @@ -22,6 +23,8 @@ enum trace_type {
>        TRACE_MMIO_RW,
>        TRACE_MMIO_MAP,
>        TRACE_BOOT,
> +       TRACE_BKL_ACQ,
> +       TRACE_BKL_REL,
>
>        __TRACE_LAST_TYPE
>  };
> @@ -119,6 +122,17 @@ struct trace_boot {
>        struct boot_trace       initcall;
>  };
>
> +/* Trace bkl entries */
> +struct trace_bkl_acq {
> +       struct trace_entry              ent;
> +       struct bkl_trace_acquire        bkl_acq;
> +};
> +
> +struct trace_bkl_rel {
> +       struct trace_entry              ent;
> +       struct bkl_trace_release        bkl_rel;
> +};
> +
>  /*
>  * trace_flag_type is an enumeration that holds different
>  * states when a trace occurs. These are:
> @@ -219,6 +233,10 @@ extern void __ftrace_bad_type(void);
>                IF_ASSIGN(var, ent, struct trace_mmiotrace_map,         \
>                          TRACE_MMIO_MAP);                              \
>                IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);     \
> +               IF_ASSIGN(var, ent, struct trace_bkl_acq,               \
> +                         TRACE_BKL_ACQ);                               \
> +               IF_ASSIGN(var, ent, struct trace_bkl_rel,               \
> +                         TRACE_BKL_REL);                               \
>                __ftrace_bad_type();                                    \
>        } while (0)
>
> diff --git a/kernel/trace/trace_bkl.c b/kernel/trace/trace_bkl.c
> new file mode 100644
> index 0000000..169ee01
> --- /dev/null
> +++ b/kernel/trace/trace_bkl.c
> @@ -0,0 +1,170 @@
> +/*
> + * Ring buffer based Big Kernel Lock tracer
> + * for purpose of measuring the latencies issued
> + * by the BKL.
> + *
> + * Copyright (C) 2008 Frederic Weisbecker <fweisbec@...il.com>
> + */
> +
> +#include <trace/bkl.h>
> +#include "trace.h"
> +#include <linux/ftrace.h>
> +
> +
> +static struct trace_array *bkl_trace;
> +static bool bkl_tracer_enabled;
> +
> +
> +static void bkl_trace_init(struct trace_array *tr)
> +{
> +       int cpu;
> +       bkl_trace = tr;
> +
> +       for_each_cpu_mask(cpu, cpu_possible_map)
> +               tracing_reset(tr, cpu);
> +
> +       if (tr->ctrl)
> +               bkl_tracer_enabled = 1;
> +}
> +
> +static void reset_bkl_trace(struct trace_array *tr)
> +{
> +       bkl_tracer_enabled = 0;
> +}
> +
> +static void bkl_trace_ctrl_update(struct trace_array *tr)
> +{
> +       if (tr->ctrl)
> +               bkl_tracer_enabled = 1;
> +       else
> +               bkl_tracer_enabled = 0;
> +}
> +
> +static enum print_line_t bkl_acq_print_line(struct trace_iterator *iter)
> +{
> +       int ret;
> +       struct trace_entry *entry = iter->ent;
> +       struct trace_bkl_acq *field = (struct trace_bkl_acq *) entry;
> +       struct bkl_trace_acquire *bta = &field->bkl_acq;
> +       struct trace_seq *s = &iter->seq;
> +       unsigned long sec, nsec_rem;
> +       unsigned long long wait_for_spin;
> +
> +       wait_for_spin = bta->acquire_time - bta->acquire_req_time;
> +       nsec_rem = do_div(bta->acquire_req_time, 1000000000);
> +       sec = (unsigned long) bta->acquire_req_time;
> +       ret = trace_seq_printf(s, "Kernel lock requested at %5lu.%09lu ",
> +                                 sec, nsec_rem);
> +       if (!ret)
> +               return TRACE_TYPE_PARTIAL_LINE;
> +
> +       nsec_rem = do_div(bta->acquire_time, 1000000000);
> +       sec = (unsigned long) bta->acquire_time;
> +       ret = trace_seq_printf(s, "acquired at %5lu.%09lu "
> +                                 "(waited %llu nsecs)\n",
> +                                 sec, nsec_rem,
> +                                 wait_for_spin);
> +       if (!ret)
> +               return TRACE_TYPE_PARTIAL_LINE;
> +       return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_rel_print_line(struct trace_iterator *iter)
> +{
> +       int ret;
> +       struct trace_entry *entry = iter->ent;
> +       struct trace_bkl_rel *field = (struct trace_bkl_rel *) entry;
> +       struct bkl_trace_release *btr = &field->bkl_rel;
> +       struct trace_seq *s = &iter->seq;
> +       unsigned long sec, nsec_rem;
> +
> +       nsec_rem = do_div(btr->release_time, 1000000000);
> +       sec = (unsigned long) btr->release_time;
> +       ret = trace_seq_printf(s, "Kernel lock released at  %5lu.%09lu\n",
> +                                 sec, nsec_rem);
> +       return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_trace_print_line(struct trace_iterator *iter)
> +{
> +       struct trace_entry *entry = iter->ent;
> +
> +       if (entry->type == TRACE_BKL_ACQ)
> +               return bkl_acq_print_line(iter);
> +       if (entry->type == TRACE_BKL_REL)
> +               return bkl_rel_print_line(iter);
> +
> +       return TRACE_TYPE_UNHANDLED;
> +}
> +
> +static struct tracer bkl_tracer __read_mostly =
> +{
> +       .name           = "bkl",
> +       .init           = bkl_trace_init,
> +       .reset          = reset_bkl_trace,
> +       .ctrl_update    = bkl_trace_ctrl_update,
> +       .print_line     = bkl_trace_print_line,
> +};
> +
> +__init static int init_bkl_tracer(void)
> +{
> +       return register_tracer(&bkl_tracer);
> +}
> +device_initcall(init_bkl_tracer);
> +
> +/*
> + * These functions don't need us to disable
> + * the preemption since it has been done by lock_kernel()
> + */
> +
> +void trace_bkl_acquire(struct bkl_trace_acquire *trace)
> +{
> +       struct ring_buffer_event *event;
> +       struct trace_bkl_acq *entry;
> +       struct trace_array_cpu *data;
> +       unsigned long irq_flags;
> +       struct trace_array *tr = bkl_trace;
> +
> +       if (!bkl_tracer_enabled)
> +               return;
> +
> +       data = tr->data[smp_processor_id()];
> +
> +       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> +                                        &irq_flags);
> +       if (!event)
> +               return;
> +       entry   = ring_buffer_event_data(event);
> +       tracing_generic_entry_update(&entry->ent, 0, 0);
> +       entry->ent.type = TRACE_BKL_ACQ;
> +       entry->bkl_acq = *trace;
> +       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> +       trace_wake_up();
> +}
> +
> +void trace_bkl_release(struct bkl_trace_release *trace)
> +{
> +       struct ring_buffer_event *event;
> +       struct trace_bkl_rel *entry;
> +       struct trace_array_cpu *data;
> +       unsigned long irq_flags;
> +       struct trace_array *tr = bkl_trace;
> +
> +       if (!bkl_tracer_enabled)
> +               return;
> +
> +       data = tr->data[smp_processor_id()];
> +
> +       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> +                                        &irq_flags);
> +       if (!event)
> +               return;
> +       entry   = ring_buffer_event_data(event);
> +       tracing_generic_entry_update(&entry->ent, 0, 0);
> +       entry->ent.type = TRACE_BKL_REL;
> +       entry->bkl_rel = *trace;
> +       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> +       trace_wake_up();
> +}
> diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
> index 01a3c22..9987834 100644
> --- a/lib/kernel_lock.c
> +++ b/lib/kernel_lock.c
> @@ -9,6 +9,7 @@
>  #include <linux/module.h>
>  #include <linux/kallsyms.h>
>  #include <linux/semaphore.h>
> +#include <trace/bkl.h>
>
>  /*
>  * The 'big kernel lock'
> @@ -116,16 +117,33 @@ static inline void __unlock_kernel(void)
>  void __lockfunc lock_kernel(void)
>  {
>        int depth = current->lock_depth+1;
> -       if (likely(!depth))
> +       if (likely(!depth)) {
> +#ifdef CONFIG_BKL_TRACER
> +               struct bkl_trace_acquire trace;
> +               preempt_disable();
> +               trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
> +               preempt_enable();
>                __lock_kernel();
> +               trace.acquire_time = cpu_clock(raw_smp_processor_id());
> +               trace_bkl_acquire(&trace);
> +#else
> +               __lock_kernel();
> +#endif
> +       }
>        current->lock_depth = depth;
>  }
>
>  void __lockfunc unlock_kernel(void)
>  {
>        BUG_ON(current->lock_depth < 0);
> -       if (likely(--current->lock_depth < 0))
> +       if (likely(--current->lock_depth < 0)) {
> +#ifdef CONFIG_BKL_TRACER
> +               struct bkl_trace_release trace;
> +               trace.release_time = cpu_clock(raw_smp_processor_id());
> +               trace_bkl_release(&trace);
> +#endif
>                __unlock_kernel();
> +       }
>  }
>
>  EXPORT_SYMBOL(lock_kernel);
>

Are these last 15 days patches submitted to tip put in a wait queue
because of the
merge window? I hope that the reason for no comment on them is not because
they stand in a total irrelevance...

Sorry I just wonder.....
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ