[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1271813588.10448.169.camel@gandalf.stny.rr.com>
Date: Tue, 20 Apr 2010 21:33:08 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: David Miller <davem@...emloft.net>
Cc: fweisbec@...il.com, linux-kernel@...r.kernel.org,
sparclinux@...r.kernel.org
Subject: Re: [PATCH] ring-buffer: Make non-consuming read less expensive
with lots of cpus.
On Tue, 2010-04-20 at 15:47 -0700, David Miller wrote:
> When performing a non-consuming read, a synchronize_sched() is
> performed once for every cpu which is actively tracing.
>
> This is very expensive, and can make it take several seconds to open
> up the 'trace' file with lots of cpus.
>
> Only one synchronize_sched() call is actually necessary. What is
> desired is for all cpus to see the disabling state change. So we
> transform the existing sequence:
>
> for_each_cpu() {
> ring_buffer_read_start();
> }
>
> where each ring_buffer_start() call performs a synchronize_sched(),
> into the following:
>
> for_each_cpu() {
> ring_buffer_read_prepare();
> }
> ring_buffer_read_prepare_sync();
> for_each_cpu() {
> ring_buffer_read_start();
> }
>
> wherein only the single ring_buffer_read_prepare_sync() call needs to
> do the synchronize_sched().
>
> The first phase, via ring_buffer_read_prepare(), allocates the 'iter'
> memory and increments ->record_disabled.
>
> In the second phase, ring_buffer_read_prepare_sync() makes sure this
> ->record_disabled state is visible fully to all cpus.
>
> And in the final third phase, the ring_buffer_read_start() calls reset
> the 'iter' objects allocated in the first phase since we now know that
> none of the cpus are adding trace entries any more.
>
> This makes openning the 'trace' file nearly instantaneous on a
> sparc64 Niagara2 box with 128 cpus tracing.
>
> Signed-off-by: David S. Miller <davem@...emloft.net>
Thanks David!
I'll take a closer look at the patch tomorrow, but it definitely looks
like it is needed.
-- Steve
>
> diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
> index 5fcc31e..827848e 100644
> --- a/include/linux/ring_buffer.h
> +++ b/include/linux/ring_buffer.h
> @@ -125,7 +125,9 @@ struct ring_buffer_event *
> ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
>
> struct ring_buffer_iter *
> -ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
> +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu);
> +void ring_buffer_read_prepare_sync(void);
> +void ring_buffer_read_start(struct ring_buffer_iter *iter);
> void ring_buffer_read_finish(struct ring_buffer_iter *iter);
>
> struct ring_buffer_event *
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 41ca394..ca4be22 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -3276,23 +3276,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
> EXPORT_SYMBOL_GPL(ring_buffer_consume);
>
> /**
> - * ring_buffer_read_start - start a non consuming read of the buffer
> + * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
> * @buffer: The ring buffer to read from
> * @cpu: The cpu buffer to iterate over
> *
> - * This starts up an iteration through the buffer. It also disables
> - * the recording to the buffer until the reading is finished.
> - * This prevents the reading from being corrupted. This is not
> - * a consuming read, so a producer is not expected.
> + * This performs the initial preparations necessary to iterate
> + * through the buffer. Memory is allocated, buffer recording
> + * is disabled, and the iterator pointer is returned to the caller.
> *
> - * Must be paired with ring_buffer_finish.
> + * Disabling buffer recordng prevents the reading from being
> + * corrupted. This is not a consuming read, so a producer is not
> + * expected.
> + *
> + * After a sequence of ring_buffer_read_prepare calls, the user is
> + * expected to make at least one call to ring_buffer_prepare_sync.
> + * Afterwards, ring_buffer_read_start is invoked to get things going
> + * for real.
> + *
> + * This overall must be paired with ring_buffer_finish.
> */
> struct ring_buffer_iter *
> -ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
> +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
> {
> struct ring_buffer_per_cpu *cpu_buffer;
> struct ring_buffer_iter *iter;
> - unsigned long flags;
>
> if (!cpumask_test_cpu(cpu, buffer->cpumask))
> return NULL;
> @@ -3306,15 +3313,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
> iter->cpu_buffer = cpu_buffer;
>
> atomic_inc(&cpu_buffer->record_disabled);
> +
> + return iter;
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
> +
> +/**
> + * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
> + *
> + * All previously invoked ring_buffer_read_prepare calls to prepare
> + * iterators will be synchronized. Afterwards, read_buffer_read_start
> + * calls on those iterators are allowed.
> + */
> +void
> +ring_buffer_read_prepare_sync(void)
> +{
> synchronize_sched();
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
> +
> +/**
> + * ring_buffer_read_start - start a non consuming read of the buffer
> + * @iter: The iterator returned by ring_buffer_read_prepare
> + *
> + * This finalizes the startup of an iteration through the buffer.
> + * The iterator comes from a call to ring_buffer_read_prepare and
> + * an intervening ring_buffer_read_prepare_sync must have been
> + * performed.
> + *
> + * Must be paired with ring_buffer_finish.
> + */
> +void
> +ring_buffer_read_start(struct ring_buffer_iter *iter)
> +{
> + struct ring_buffer_per_cpu *cpu_buffer;
> + unsigned long flags;
> +
> + if (!iter)
> + return;
> +
> + cpu_buffer = iter->cpu_buffer;
>
> spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> arch_spin_lock(&cpu_buffer->lock);
> rb_iter_reset(iter);
> arch_spin_unlock(&cpu_buffer->lock);
> spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> -
> - return iter;
> }
> EXPORT_SYMBOL_GPL(ring_buffer_read_start);
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 44f916a..3f161d8 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2166,15 +2166,20 @@ __tracing_open(struct inode *inode, struct file *file)
>
> if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
> for_each_tracing_cpu(cpu) {
> -
> iter->buffer_iter[cpu] =
> - ring_buffer_read_start(iter->tr->buffer, cpu);
> + ring_buffer_read_prepare(iter->tr->buffer, cpu);
> + }
> + ring_buffer_read_prepare_sync();
> + for_each_tracing_cpu(cpu) {
> + ring_buffer_read_start(iter->buffer_iter[cpu]);
> tracing_iter_reset(iter, cpu);
> }
> } else {
> cpu = iter->cpu_file;
> iter->buffer_iter[cpu] =
> - ring_buffer_read_start(iter->tr->buffer, cpu);
> + ring_buffer_read_prepare(iter->tr->buffer, cpu);
> + ring_buffer_read_prepare_sync();
> + ring_buffer_read_start(iter->buffer_iter[cpu]);
> tracing_iter_reset(iter, cpu);
> }
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists