[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <565db357cfe61c4b350323968aee15bd477ef606.camel@gmail.com>
Date: Wed, 30 Jun 2021 10:05:48 +0200
From: Riccardo Mancini <rickyman7@...il.com>
To: Alexey Bayduraev <alexey.v.bayduraev@...ux.intel.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Jiri Olsa <jolsa@...hat.com>, Namhyung Kim <namhyung@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
linux-kernel <linux-kernel@...r.kernel.org>,
Andi Kleen <ak@...ux.intel.com>,
Adrian Hunter <adrian.hunter@...el.com>,
Alexander Antonov <alexander.antonov@...ux.intel.com>,
Alexei Budankov <abudankov@...wei.com>
Subject: Re: [PATCH v7 07/20] perf record: Introduce data transferred and
compressed stats
Hi,
On Tue, 2021-06-22 at 11:42 +0300, Alexey Bayduraev wrote:
> Introduce bytes_transferred and bytes_compressed stats so they
> would capture statistics for the related data buffer transfers.
>
> Acked-by: Andi Kleen <ak@...ux.intel.com>
> Signed-off-by: Alexey Bayduraev <alexey.v.bayduraev@...ux.intel.com>
> ---
> tools/perf/builtin-record.c | 64 +++++++++++++++++++++++++++++--------
> tools/perf/util/mmap.h | 3 ++
> 2 files changed, 54 insertions(+), 13 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 38bb5afbb359..c9fd31211600 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -198,6 +198,11 @@ static int record__write(struct record *rec, struct mmap
> *map __maybe_unused,
> return -1;
> }
>
> + if (map && map->file) {
> + map->bytes_written += size;
> + return 0;
> + }
> +
> rec->bytes_written += size;
>
> if (record__output_max_size_exceeded(rec) && !done) {
This breaks the --max-size option in parallel trace mode.
Maybe, we could change record__output_max_size_exceeded to check all
thread_data->maps and sum up bytes_written. Concurrency should not be an issue
since it's not required to stop at exactly the max-size (this is also the
current behaviour).
Otherwise, we could atomically increase an accumulator inside struct record
(maybe rec->bytes_written could be reused, but I'm not sure if it's used by the
main thread in the parallel mode), and check it for exceeded max size.
Thanks,
Riccardo
> @@ -215,8 +220,8 @@ static int record__write(struct record *rec, struct mmap
> *map __maybe_unused,
>
> static int record__aio_enabled(struct record *rec);
> static int record__comp_enabled(struct record *rec);
> -static size_t zstd_compress(struct perf_session *session, void *dst, size_t
> dst_size,
> - void *src, size_t src_size);
> +static size_t zstd_compress(struct zstd_data *data,
> + void *dst, size_t dst_size, void *src, size_t
> src_size);
>
> #ifdef HAVE_AIO_SUPPORT
> static int record__aio_write(struct aiocb *cblock, int trace_fd,
> @@ -350,9 +355,13 @@ static int record__aio_pushfn(struct mmap *map, void *to,
> void *buf, size_t size
> */
>
> if (record__comp_enabled(aio->rec)) {
> - size = zstd_compress(aio->rec->session, aio->data + aio->size,
> - mmap__mmap_len(map) - aio->size,
> + struct zstd_data *zstd_data = &aio->rec->session->zstd_data;
> +
> + aio->rec->session->bytes_transferred += size;
> + size = zstd_compress(zstd_data,
> + aio->data + aio->size,
> mmap__mmap_len(map) - aio->size,
> buf, size);
> + aio->rec->session->bytes_compressed += size;
> } else {
> memcpy(aio->data + aio->size, buf, size);
> }
> @@ -577,8 +586,22 @@ static int record__pushfn(struct mmap *map, void *to,
> void *bf, size_t size)
> struct record *rec = to;
>
> if (record__comp_enabled(rec)) {
> - size = zstd_compress(rec->session, map->data,
> mmap__mmap_len(map), bf, size);
> + struct zstd_data *zstd_data = &rec->session->zstd_data;
> +
> + if (map->file) {
> + zstd_data = &map->zstd_data;
> + map->bytes_transferred += size;
> + } else {
> + rec->session->bytes_transferred += size;
> + }
> +
> + size = zstd_compress(zstd_data, map->data,
> mmap__mmap_len(map), bf, size);
> bf = map->data;
> +
> + if (map->file)
> + map->bytes_compressed += size;
> + else
> + rec->session->bytes_compressed += size;
> }
>
> thread->samples++;
> @@ -1311,18 +1334,15 @@ static size_t process_comp_header(void *record, size_t
> increment)
> return size;
> }
>
> -static size_t zstd_compress(struct perf_session *session, void *dst, size_t
> dst_size,
> +static size_t zstd_compress(struct zstd_data *zstd_data, void *dst, size_t
> dst_size,
> void *src, size_t src_size)
> {
> size_t compressed;
> size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct
> perf_record_compressed) - 1;
>
> - compressed = zstd_compress_stream_to_records(&session->zstd_data, dst,
> dst_size, src, src_size,
> + compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size,
> src, src_size,
> max_record_size,
> process_comp_header);
>
> - session->bytes_transferred += src_size;
> - session->bytes_compressed += compressed;
> -
> return compressed;
> }
>
> @@ -2006,8 +2026,10 @@ static int record__start_threads(struct record *rec)
>
> static int record__stop_threads(struct record *rec, unsigned long *waking)
> {
> - int t;
> + int t, tm;
> + struct mmap *map, *overwrite_map;
> struct thread_data *thread_data = rec->thread_data;
> + u64 bytes_written = 0, bytes_transferred = 0, bytes_compressed = 0;
>
> for (t = 1; t < rec->nr_threads; t++)
> record__terminate_thread(&thread_data[t]);
> @@ -2015,9 +2037,25 @@ static int record__stop_threads(struct record *rec,
> unsigned long *waking)
> for (t = 0; t < rec->nr_threads; t++) {
> rec->samples += thread_data[t].samples;
> *waking += thread_data[t].waking;
> - pr_debug("threads[%d]: samples=%lld, wakes=%ld,
> trasferred=%ld, compressed=%ld\n",
> + for (tm = 0; tm < thread_data[t].nr_mmaps; tm++) {
> + if (thread_data[t].maps) {
> + map = thread_data[t].maps[tm];
> + bytes_transferred += map->bytes_transferred;
> + bytes_compressed += map->bytes_compressed;
> + bytes_written += map->bytes_written;
> + }
> + if (thread_data[t].overwrite_maps) {
> + overwrite_map =
> thread_data[t].overwrite_maps[tm];
> + bytes_transferred += overwrite_map-
> >bytes_transferred;
> + bytes_compressed += overwrite_map-
> >bytes_compressed;
> + bytes_written += overwrite_map->bytes_written;
> + }
> + }
> + rec->session->bytes_transferred += bytes_transferred;
> + rec->session->bytes_compressed += bytes_compressed;
> + pr_debug("threads[%d]: samples=%lld, wakes=%ld,
> trasferred=%ld, compressed=%ld, written=%ld\n",
> thread_data[t].tid, thread_data[t].samples,
> thread_data[t].waking,
> - rec->session->bytes_transferred, rec->session-
> >bytes_compressed);
> + bytes_transferred, bytes_compressed, bytes_written);
> }
>
> return 0;
> diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
> index c4aed6e89549..c04ca4b5adf5 100644
> --- a/tools/perf/util/mmap.h
> +++ b/tools/perf/util/mmap.h
> @@ -46,6 +46,9 @@ struct mmap {
> int comp_level;
> struct perf_data_file *file;
> struct zstd_data zstd_data;
> + u64 bytes_transferred;
> + u64 bytes_compressed;
> + u64 bytes_written;
> };
>
> struct mmap_params {
Powered by blists - more mailing lists