[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPhsuW52Vfsx7gfPppxH_E=6Y6xtMrWzCAx0=aJk0odm_q=ZzA@mail.gmail.com>
Date: Thu, 1 Nov 2018 11:30:40 -0700
From: Song Liu <liu.song.a23@...il.com>
To: Alexey Budankov <alexey.budankov@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>, mingo@...hat.com,
acme@...nel.org, alexander.shishkin@...ux.intel.com,
jolsa@...hat.com, namhyung@...nel.org, ak@...ux.intel.com,
open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v14 3/3]: perf record: extend trace writing to multi AIO
On Sun, Oct 14, 2018 at 11:47 PM Alexey Budankov
<alexey.budankov@...ux.intel.com> wrote:
>
>
> Multi AIO trace writing allows caching more kernel data into userspace
> memory postponing trace writing for the sake of overall profiling data
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
>
> With aio option value different from 0, default value is 1,
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
>
> That allows avoiding suspend at record__aio_sync() between calls of
> record__mmap_read_evlist() and increase profiling data thruput for
> the cost of userspace memory.
>
> Signed-off-by: Alexey Budankov <alexey.budankov@...ux.intel.com>
> ---
> Changes in v14:
> - fix --aio option handling
> Changes in v13:
> - preserved --aio option name avoiding complication
> Changes in v12:
> - extended --aio option to --aio-cblocks=<n>
> Changes in v10:
> - added description of aio-cblocks option into perf-record.tx
> ---
> tools/perf/Documentation/perf-record.txt | 4 +-
> tools/perf/builtin-record.c | 64 ++++++++++++++++++++++++--------
> tools/perf/util/mmap.c | 64 +++++++++++++++++++++-----------
> tools/perf/util/mmap.h | 9 +++--
> 4 files changed, 99 insertions(+), 42 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 7efb4af88a68..d232b13ea713 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo.
> --buildid-all::
> Record build-id of all DSOs regardless whether it's actually hit or not.
>
> ---aio::
> -Enable asynchronous (Posix AIO) trace writing mode.
> +--aio[=n]::
> +Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4).
> Asynchronous mode is supported only when linking Perf tool with libc library
> providing implementation for Posix AIO API.
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 0c6105860123..c5d9288b8670 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
> return rc;
> }
>
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
> {
> - struct aiocb *cblock = &md->aio.cblock;
> + struct aiocb **aiocb = md->aio.aiocb;
> + struct aiocb *cblocks = md->aio.cblocks;
> struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
> + int i, do_suspend;
>
> do {
> - if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock))
> - return;
> + do_suspend = 0;
> + for (i = 0; i < md->aio.nr_cblocks; ++i) {
> + if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
> + if (sync_all)
> + aiocb[i] = NULL;
> + else
> + return i;
> + } else {
> + /*
> + * Started aio write is not complete yet
> + * so it has to be waited before the
> + * next allocation.
> + */
> + aiocb[i] = &cblocks[i];
> + do_suspend = 1;
> + }
> + }
> + if (!do_suspend)
> + return -1;
>
> - while (aio_suspend((const struct aiocb**)&cblock, 1, &timeout)) {
> + while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
> if (!(errno == EAGAIN || errno == EINTR))
> pr_err("failed to sync perf data, error: %m\n");
> }
> @@ -252,28 +271,33 @@ static void record__aio_mmap_read_sync(struct record *rec)
> struct perf_mmap *map = &maps[i];
>
> if (map->base)
> - record__aio_sync(map);
> + record__aio_sync(map, true);
> }
> }
>
> static int nr_cblocks_default = 1;
>
> static int record__aio_parse(const struct option *opt,
> - const char *str __maybe_unused,
> + const char *str,
> int unset)
> {
> struct record_opts *opts = (struct record_opts *)opt->value;
>
> - if (unset)
> + if (unset) {
> opts->nr_cblocks = 0;
> - else
> - opts->nr_cblocks = nr_cblocks_default;
> + } else {
> + if (str)
> + opts->nr_cblocks = strtol(str, NULL, 0);
> + if (!opts->nr_cblocks)
> + opts->nr_cblocks = nr_cblocks_default;
> + }
>
> return 0;
> }
> #else /* HAVE_AIO_SUPPORT */
> -static void record__aio_sync(struct perf_mmap *md __maybe_unused)
> +static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
> {
> + return -1;
> }
>
> static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
> @@ -723,12 +747,13 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
> goto out;
> }
> } else {
> + int idx;
> /*
> * Call record__aio_sync() to wait till map->data buffer
> * becomes available after previous aio write request.
> */
> - record__aio_sync(map);
> - if (perf_mmap__aio_push(map, rec, record__aio_pushfn, &off) != 0) {
> + idx = record__aio_sync(map, false);
> + if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
> record__aio_set_pos(trace_fd, off);
> rc = -1;
> goto out;
> @@ -1492,6 +1517,13 @@ static int perf_record_config(const char *var, const char *value, void *cb)
> var = "call-graph.record-mode";
> return perf_default_config(var, value, cb);
> }
> +#ifdef HAVE_AIO_SUPPORT
> + if (!strcmp(var, "record.aio")) {
> + rec->opts.nr_cblocks = strtol(value, NULL, 0);
> + if (!rec->opts.nr_cblocks)
> + rec->opts.nr_cblocks = nr_cblocks_default;
> + }
> +#endif
>
> return 0;
> }
> @@ -1884,8 +1916,8 @@ static struct option __record_options[] = {
> OPT_BOOLEAN(0, "dry-run", &dry_run,
> "Parse options then exit"),
> #ifdef HAVE_AIO_SUPPORT
> - OPT_CALLBACK_NOOPT(0, "aio", &record.opts,
> - NULL, "Enable asynchronous trace writing mode",
> + OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
> + &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
> record__aio_parse),
> #endif
> OPT_END()
> @@ -2080,6 +2112,8 @@ int cmd_record(int argc, const char **argv)
> goto out;
> }
>
> + if (rec->opts.nr_cblocks > 4)
> + rec->opts.nr_cblocks = 4;
nit: I feel this logic belongs to record__aio_parse(). And we should have
static int nr_cblocks_max = 4;
Other than this
Acked-by: Song Liu <songliubraving@...com>
Powered by blists - more mailing lists