[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ffedeac7-bb47-4fab-a0b4-23c75d133e44@linux.intel.com>
Date: Mon, 27 Jan 2025 10:50:38 -0500
From: "Liang, Kan" <kan.liang@...ux.intel.com>
To: Ian Rogers <irogers@...gle.com>, Dapeng Mi <dapeng1.mi@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Andi Kleen <ak@...ux.intel.com>, Eranian Stephane <eranian@...gle.com>,
linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
Dapeng Mi <dapeng1.mi@...el.com>
Subject: Re: [PATCH 18/20] perf tools: Support to capture more vector
registers (common part)
On 2025-01-23 11:42 a.m., Ian Rogers wrote:
> On Wed, Jan 22, 2025 at 10:21 PM Dapeng Mi <dapeng1.mi@...ux.intel.com> wrote:
>>
>> Intel architectural PEBS supports to capture more vector registers like
>> OPMASK/YMM/ZMM registers besides already supported XMM registers.
>>
>> arch-PEBS vector registers (VCER) capturing on perf core/pmu driver
>> (Intel) has been supported by previous patches. This patch adds perf
>> tool's part support. In detail, add support for the new
>> sample_regs_intr_ext register selector in perf_event_attr. This 32 bytes
>> bitmap is used to select the new register group OPMASK, YMMH, ZMMH and
>> ZMM in VECR. Update perf regs to introduce the new registers.
>>
>> This single patch only introduces the common support, x86/intel specific
>> support would be added in next patch.
>
> Could you break down what the individual changes are? I see quite a
> few, some in printing, some with functions like arch__intr_reg_mask.
> I'm sure the changes are well motivated but there is little detail in
> the commit message. Perhaps there is some chance to separate each
> change into its own patch. By detail I mean something like, "change
> arch__intr_reg_mask to taking a pointer so that REG_MASK and array
> initialization is possible."
>
> It is a shame arch__intr_reg_mask doesn't match arch__user_reg_mask
> following this change. Perhaps update them both for the sake of
> consistency.
Yes, it sounds cleaner. The same size but different mask. It may waste
some space but it should be OK.
>
> Out of scope here, I wonder in general how we can get this code out of
> the arch directory? For example, it would be nice if we have say an
> arm perf command running on qemu-user on an x86 that we perhaps want
> to do the appropriate reg_mask for x86.
Different ARCH has a different pt_regs. It seems hard to general a
generic reg list.
Thanks,
Kan
>
> Thanks,
> Ian
>
>> Co-developed-by: Kan Liang <kan.liang@...ux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
>> ---
>> tools/include/uapi/linux/perf_event.h | 13 +++++++++
>> tools/perf/arch/arm/util/perf_regs.c | 5 +---
>> tools/perf/arch/arm64/util/perf_regs.c | 5 +---
>> tools/perf/arch/csky/util/perf_regs.c | 5 +---
>> tools/perf/arch/loongarch/util/perf_regs.c | 5 +---
>> tools/perf/arch/mips/util/perf_regs.c | 5 +---
>> tools/perf/arch/powerpc/util/perf_regs.c | 9 ++++---
>> tools/perf/arch/riscv/util/perf_regs.c | 5 +---
>> tools/perf/arch/s390/util/perf_regs.c | 5 +---
>> tools/perf/arch/x86/util/perf_regs.c | 9 ++++---
>> tools/perf/builtin-script.c | 19 ++++++++++---
>> tools/perf/util/evsel.c | 14 +++++++---
>> tools/perf/util/parse-regs-options.c | 23 +++++++++-------
>> tools/perf/util/perf_regs.c | 5 ----
>> tools/perf/util/perf_regs.h | 18 +++++++++++--
>> tools/perf/util/record.h | 2 +-
>> tools/perf/util/sample.h | 6 ++++-
>> tools/perf/util/session.c | 31 +++++++++++++---------
>> tools/perf/util/synthetic-events.c | 7 +++--
>> 19 files changed, 116 insertions(+), 75 deletions(-)
>>
>> diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
>> index 4842c36fdf80..02d8f55f6247 100644
>> --- a/tools/include/uapi/linux/perf_event.h
>> +++ b/tools/include/uapi/linux/perf_event.h
>> @@ -379,6 +379,13 @@ enum perf_event_read_format {
>> #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
>> #define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
>> #define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
>> +#define PERF_ATTR_SIZE_VER9 168 /* add: sample_regs_intr_ext[PERF_EXT_REGS_ARRAY_SIZE] */
>> +
>> +#define PERF_EXT_REGS_ARRAY_SIZE 4
>> +#define PERF_NUM_EXT_REGS (PERF_EXT_REGS_ARRAY_SIZE * 64)
>> +
>> +#define PERF_NUM_INTR_REGS (PERF_EXT_REGS_ARRAY_SIZE + 1)
>> +#define PERF_NUM_INTR_REGS_SIZE ((PERF_NUM_INTR_REGS) * 64)
>>
>> /*
>> * Hardware event_id to monitor via a performance monitoring event:
>> @@ -522,6 +529,12 @@ struct perf_event_attr {
>> __u64 sig_data;
>>
>> __u64 config3; /* extension of config2 */
>> +
>> + /*
>> + * Extension sets of regs to dump for each sample.
>> + * See asm/perf_regs.h for details.
>> + */
>> + __u64 sample_regs_intr_ext[PERF_EXT_REGS_ARRAY_SIZE];
>> };
>>
>> /*
>> diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c
>> index f94a0210c7b7..3a3c2779efd4 100644
>> --- a/tools/perf/arch/arm/util/perf_regs.c
>> +++ b/tools/perf/arch/arm/util/perf_regs.c
>> @@ -6,10 +6,7 @@ static const struct sample_reg sample_reg_masks[] = {
>> SMPL_REG_END
>> };
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
>> index 09308665e28a..754bb8423733 100644
>> --- a/tools/perf/arch/arm64/util/perf_regs.c
>> +++ b/tools/perf/arch/arm64/util/perf_regs.c
>> @@ -140,10 +140,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
>> return SDT_ARG_VALID;
>> }
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c
>> index 6b1665f41180..9d132150ecb6 100644
>> --- a/tools/perf/arch/csky/util/perf_regs.c
>> +++ b/tools/perf/arch/csky/util/perf_regs.c
>> @@ -6,10 +6,7 @@ static const struct sample_reg sample_reg_masks[] = {
>> SMPL_REG_END
>> };
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/loongarch/util/perf_regs.c b/tools/perf/arch/loongarch/util/perf_regs.c
>> index f94a0210c7b7..3a3c2779efd4 100644
>> --- a/tools/perf/arch/loongarch/util/perf_regs.c
>> +++ b/tools/perf/arch/loongarch/util/perf_regs.c
>> @@ -6,10 +6,7 @@ static const struct sample_reg sample_reg_masks[] = {
>> SMPL_REG_END
>> };
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c
>> index 6b1665f41180..9d132150ecb6 100644
>> --- a/tools/perf/arch/mips/util/perf_regs.c
>> +++ b/tools/perf/arch/mips/util/perf_regs.c
>> @@ -6,10 +6,7 @@ static const struct sample_reg sample_reg_masks[] = {
>> SMPL_REG_END
>> };
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
>> index e8e6e6fc6f17..08ab9ed692fb 100644
>> --- a/tools/perf/arch/powerpc/util/perf_regs.c
>> +++ b/tools/perf/arch/powerpc/util/perf_regs.c
>> @@ -186,7 +186,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
>> return SDT_ARG_VALID;
>> }
>>
>> -uint64_t arch__intr_reg_mask(void)
>> +void arch__intr_reg_mask(unsigned long *mask)
>> {
>> struct perf_event_attr attr = {
>> .type = PERF_TYPE_HARDWARE,
>> @@ -198,7 +198,9 @@ uint64_t arch__intr_reg_mask(void)
>> };
>> int fd;
>> u32 version;
>> - u64 extended_mask = 0, mask = PERF_REGS_MASK;
>> + u64 extended_mask = 0;
>> +
>> + *(u64 *)mask = PERF_REGS_MASK;
>>
>> /*
>> * Get the PVR value to set the extended
>> @@ -223,9 +225,8 @@ uint64_t arch__intr_reg_mask(void)
>> fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
>> if (fd != -1) {
>> close(fd);
>> - mask |= extended_mask;
>> + *(u64 *)mask |= extended_mask;
>> }
>> - return mask;
>> }
>>
>> uint64_t arch__user_reg_mask(void)
>> diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c
>> index 6b1665f41180..9d132150ecb6 100644
>> --- a/tools/perf/arch/riscv/util/perf_regs.c
>> +++ b/tools/perf/arch/riscv/util/perf_regs.c
>> @@ -6,10 +6,7 @@ static const struct sample_reg sample_reg_masks[] = {
>> SMPL_REG_END
>> };
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c
>> index 6b1665f41180..9d132150ecb6 100644
>> --- a/tools/perf/arch/s390/util/perf_regs.c
>> +++ b/tools/perf/arch/s390/util/perf_regs.c
>> @@ -6,10 +6,7 @@ static const struct sample_reg sample_reg_masks[] = {
>> SMPL_REG_END
>> };
>>
>> -uint64_t arch__intr_reg_mask(void)
>> -{
>> - return PERF_REGS_MASK;
>> -}
>> +void arch__intr_reg_mask(unsigned long *mask) {}
>>
>> uint64_t arch__user_reg_mask(void)
>> {
>> diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
>> index 9f492568f3b4..52f08498d005 100644
>> --- a/tools/perf/arch/x86/util/perf_regs.c
>> +++ b/tools/perf/arch/x86/util/perf_regs.c
>> @@ -283,7 +283,7 @@ const struct sample_reg *arch__sample_reg_masks(void)
>> return sample_reg_masks;
>> }
>>
>> -uint64_t arch__intr_reg_mask(void)
>> +void arch__intr_reg_mask(unsigned long *mask)
>> {
>> struct perf_event_attr attr = {
>> .type = PERF_TYPE_HARDWARE,
>> @@ -295,6 +295,9 @@ uint64_t arch__intr_reg_mask(void)
>> .exclude_kernel = 1,
>> };
>> int fd;
>> +
>> + *(u64 *)mask = PERF_REGS_MASK;
>> +
>> /*
>> * In an unnamed union, init it here to build on older gcc versions
>> */
>> @@ -320,10 +323,8 @@ uint64_t arch__intr_reg_mask(void)
>> fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
>> if (fd != -1) {
>> close(fd);
>> - return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
>> + *(u64 *)mask |= PERF_REG_EXTENDED_MASK;
>> }
>> -
>> - return PERF_REGS_MASK;
>> }
>>
>> uint64_t arch__user_reg_mask(void)
>> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
>> index 9e47905f75a6..66d3923e4040 100644
>> --- a/tools/perf/builtin-script.c
>> +++ b/tools/perf/builtin-script.c
>> @@ -704,10 +704,11 @@ static int perf_session__check_output_opt(struct perf_session *session)
>> }
>>
>> static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch,
>> - FILE *fp)
>> + unsigned long *mask_ext, FILE *fp)
>> {
>> unsigned i = 0, r;
>> int printed = 0;
>> + u64 val;
>>
>> if (!regs || !regs->regs)
>> return 0;
>> @@ -715,7 +716,15 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, cons
>> printed += fprintf(fp, " ABI:%" PRIu64 " ", regs->abi);
>>
>> for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
>> - u64 val = regs->regs[i++];
>> + val = regs->regs[i++];
>> + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
>> + }
>> +
>> + if (!mask_ext)
>> + return printed;
>> +
>> + for_each_set_bit(r, mask_ext, PERF_NUM_EXT_REGS) {
>> + val = regs->regs[i++];
>> printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
>> }
>>
>> @@ -776,14 +785,16 @@ static int perf_sample__fprintf_iregs(struct perf_sample *sample,
>> struct perf_event_attr *attr, const char *arch, FILE *fp)
>> {
>> return perf_sample__fprintf_regs(&sample->intr_regs,
>> - attr->sample_regs_intr, arch, fp);
>> + attr->sample_regs_intr, arch,
>> + (unsigned long *)attr->sample_regs_intr_ext,
>> + fp);
>> }
>>
>> static int perf_sample__fprintf_uregs(struct perf_sample *sample,
>> struct perf_event_attr *attr, const char *arch, FILE *fp)
>> {
>> return perf_sample__fprintf_regs(&sample->user_regs,
>> - attr->sample_regs_user, arch, fp);
>> + attr->sample_regs_user, arch, NULL, fp);
>> }
>>
>> static int perf_sample__fprintf_start(struct perf_script *script,
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index f745723d486b..297b960ac446 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -1314,9 +1314,11 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
>> if (callchain && callchain->enabled && !evsel->no_aux_samples)
>> evsel__config_callchain(evsel, opts, callchain);
>>
>> - if (opts->sample_intr_regs && !evsel->no_aux_samples &&
>> - !evsel__is_dummy_event(evsel)) {
>> - attr->sample_regs_intr = opts->sample_intr_regs;
>> + if (bitmap_weight(opts->sample_intr_regs, PERF_NUM_INTR_REGS_SIZE) &&
>> + !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
>> + attr->sample_regs_intr = opts->sample_intr_regs[0];
>> + memcpy(attr->sample_regs_intr_ext, &opts->sample_intr_regs[1],
>> + PERF_NUM_EXT_REGS / 8);
>> evsel__set_sample_bit(evsel, REGS_INTR);
>> }
>>
>> @@ -3097,10 +3099,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>
>> if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
>> u64 mask = evsel->core.attr.sample_regs_intr;
>> + unsigned long *mask_ext =
>> + (unsigned long *)evsel->core.attr.sample_regs_intr_ext;
>> + u64 *intr_regs_mask;
>>
>> sz = hweight64(mask) * sizeof(u64);
>> + sz += bitmap_weight(mask_ext, PERF_NUM_EXT_REGS) * sizeof(u64);
>> OVERFLOW_CHECK(array, sz, max_size);
>> data->intr_regs.mask = mask;
>> + intr_regs_mask = (u64 *)&data->intr_regs.mask_ext;
>> + memcpy(&intr_regs_mask[1], mask_ext, PERF_NUM_EXT_REGS);
>> data->intr_regs.regs = (u64 *)array;
>> array = (void *)array + sz;
>> }
>> diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
>> index cda1c620968e..666c2a172ef2 100644
>> --- a/tools/perf/util/parse-regs-options.c
>> +++ b/tools/perf/util/parse-regs-options.c
>> @@ -12,11 +12,13 @@
>> static int
>> __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>> {
>> + unsigned int size = intr ? PERF_NUM_INTR_REGS * 64 : 64;
>> uint64_t *mode = (uint64_t *)opt->value;
>> const struct sample_reg *r = NULL;
>> char *s, *os = NULL, *p;
>> int ret = -1;
>> - uint64_t mask;
>> + DECLARE_BITMAP(mask, size);
>> + DECLARE_BITMAP(mask_tmp, size);
>>
>> if (unset)
>> return 0;
>> @@ -24,13 +26,14 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>> /*
>> * cannot set it twice
>> */
>> - if (*mode)
>> + if (bitmap_weight((unsigned long *)mode, size))
>> return -1;
>>
>> + bitmap_zero(mask, size);
>> if (intr)
>> - mask = arch__intr_reg_mask();
>> + arch__intr_reg_mask(mask);
>> else
>> - mask = arch__user_reg_mask();
>> + *(uint64_t *)mask = arch__user_reg_mask();
>>
>> /* str may be NULL in case no arg is passed to -I */
>> if (str) {
>> @@ -47,7 +50,8 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>> if (!strcmp(s, "?")) {
>> fprintf(stderr, "available registers: ");
>> for (r = arch__sample_reg_masks(); r->name; r++) {
>> - if (r->mask & mask)
>> + bitmap_and(mask_tmp, mask, r->mask_ext, size);
>> + if (bitmap_weight(mask_tmp, size))
>> fprintf(stderr, "%s ", r->name);
>> }
>> fputc('\n', stderr);
>> @@ -55,7 +59,8 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>> goto error;
>> }
>> for (r = arch__sample_reg_masks(); r->name; r++) {
>> - if ((r->mask & mask) && !strcasecmp(s, r->name))
>> + bitmap_and(mask_tmp, mask, r->mask_ext, size);
>> + if (bitmap_weight(mask_tmp, size) && !strcasecmp(s, r->name))
>> break;
>> }
>> if (!r || !r->name) {
>> @@ -64,7 +69,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>> goto error;
>> }
>>
>> - *mode |= r->mask;
>> + bitmap_or((unsigned long *)mode, (unsigned long *)mode, r->mask_ext, size);
>>
>> if (!p)
>> break;
>> @@ -75,8 +80,8 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>> ret = 0;
>>
>> /* default to all possible regs */
>> - if (*mode == 0)
>> - *mode = mask;
>> + if (!bitmap_weight((unsigned long *)mode, size))
>> + bitmap_or((unsigned long *)mode, (unsigned long *)mode, mask, size);
>> error:
>> free(os);
>> return ret;
>> diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
>> index 44b90bbf2d07..b36eafc10e84 100644
>> --- a/tools/perf/util/perf_regs.c
>> +++ b/tools/perf/util/perf_regs.c
>> @@ -11,11 +11,6 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
>> return SDT_ARG_SKIP;
>> }
>>
>> -uint64_t __weak arch__intr_reg_mask(void)
>> -{
>> - return 0;
>> -}
>> -
>> uint64_t __weak arch__user_reg_mask(void)
>> {
>> return 0;
>> diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
>> index f2d0736d65cc..5018b8d040ee 100644
>> --- a/tools/perf/util/perf_regs.h
>> +++ b/tools/perf/util/perf_regs.h
>> @@ -4,18 +4,32 @@
>>
>> #include <linux/types.h>
>> #include <linux/compiler.h>
>> +#include <linux/bitmap.h>
>> +#include <linux/perf_event.h>
>> +#include "util/record.h"
>>
>> struct regs_dump;
>>
>> struct sample_reg {
>> const char *name;
>> - uint64_t mask;
>> + union {
>> + uint64_t mask;
>> + DECLARE_BITMAP(mask_ext, PERF_NUM_INTR_REGS * 64);
>> + };
>> };
>>
>> #define SMPL_REG_MASK(b) (1ULL << (b))
>> #define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
>> #define SMPL_REG2_MASK(b) (3ULL << (b))
>> #define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
>> +#define SMPL_REG_EXT(n, b) \
>> + { .name = #n, .mask_ext[b / __BITS_PER_LONG] = 0x1ULL << (b % __BITS_PER_LONG) }
>> +#define SMPL_REG2_EXT(n, b) \
>> + { .name = #n, .mask_ext[b / __BITS_PER_LONG] = 0x3ULL << (b % __BITS_PER_LONG) }
>> +#define SMPL_REG4_EXT(n, b) \
>> + { .name = #n, .mask_ext[b / __BITS_PER_LONG] = 0xfULL << (b % __BITS_PER_LONG) }
>> +#define SMPL_REG8_EXT(n, b) \
>> + { .name = #n, .mask_ext[b / __BITS_PER_LONG] = 0xffULL << (b % __BITS_PER_LONG) }
>> #define SMPL_REG_END { .name = NULL }
>>
>> enum {
>> @@ -24,7 +38,7 @@ enum {
>> };
>>
>> int arch_sdt_arg_parse_op(char *old_op, char **new_op);
>> -uint64_t arch__intr_reg_mask(void);
>> +void arch__intr_reg_mask(unsigned long *mask);
>> uint64_t arch__user_reg_mask(void);
>> const struct sample_reg *arch__sample_reg_masks(void);
>>
>> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
>> index a6566134e09e..16e44a640e57 100644
>> --- a/tools/perf/util/record.h
>> +++ b/tools/perf/util/record.h
>> @@ -57,7 +57,7 @@ struct record_opts {
>> unsigned int auxtrace_mmap_pages;
>> unsigned int user_freq;
>> u64 branch_stack;
>> - u64 sample_intr_regs;
>> + u64 sample_intr_regs[PERF_NUM_INTR_REGS];
>> u64 sample_user_regs;
>> u64 default_interval;
>> u64 user_interval;
>> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
>> index 70b2c3135555..98c9c4260de6 100644
>> --- a/tools/perf/util/sample.h
>> +++ b/tools/perf/util/sample.h
>> @@ -4,13 +4,17 @@
>>
>> #include <linux/perf_event.h>
>> #include <linux/types.h>
>> +#include <linux/bitmap.h>
>>
>> /* number of register is bound by the number of bits in regs_dump::mask (64) */
>> #define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
>>
>> struct regs_dump {
>> u64 abi;
>> - u64 mask;
>> + union {
>> + u64 mask;
>> + DECLARE_BITMAP(mask_ext, PERF_NUM_INTR_REGS * 64);
>> + };
>> u64 *regs;
>>
>> /* Cached values/mask filled by first register access. */
>> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
>> index 507e6cba9545..995f5c2963bc 100644
>> --- a/tools/perf/util/session.c
>> +++ b/tools/perf/util/session.c
>> @@ -909,12 +909,13 @@ static void branch_stack__printf(struct perf_sample *sample,
>> }
>> }
>>
>> -static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
>> +static void regs_dump__printf(bool intr, struct regs_dump *regs, const char *arch)
>> {
>> + unsigned int size = intr ? PERF_NUM_INTR_REGS * 64 : 64;
>> unsigned rid, i = 0;
>>
>> - for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
>> - u64 val = regs[i++];
>> + for_each_set_bit(rid, regs->mask_ext, size) {
>> + u64 val = regs->regs[i++];
>>
>> printf(".... %-5s 0x%016" PRIx64 "\n",
>> perf_reg_name(rid, arch), val);
>> @@ -935,16 +936,22 @@ static inline const char *regs_dump_abi(struct regs_dump *d)
>> return regs_abi[d->abi];
>> }
>>
>> -static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
>> +static void regs__printf(bool intr, struct regs_dump *regs, const char *arch)
>> {
>> - u64 mask = regs->mask;
>> + if (intr) {
>> + u64 *mask = (u64 *)®s->mask_ext;
>>
>> - printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
>> - type,
>> - mask,
>> - regs_dump_abi(regs));
>> + printf("... intr regs: mask 0x");
>> + for (int i = 0; i < PERF_NUM_INTR_REGS; i++)
>> + printf("%" PRIx64 "", mask[i]);
>> + printf(" ABI %s\n", regs_dump_abi(regs));
>> + } else {
>> + printf("... user regs: mask 0x%" PRIx64 " ABI %s\n",
>> + regs->mask,
>> + regs_dump_abi(regs));
>> + }
>>
>> - regs_dump__printf(mask, regs->regs, arch);
>> + regs_dump__printf(intr, regs, arch);
>> }
>>
>> static void regs_user__printf(struct perf_sample *sample, const char *arch)
>> @@ -952,7 +959,7 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch)
>> struct regs_dump *user_regs = &sample->user_regs;
>>
>> if (user_regs->regs)
>> - regs__printf("user", user_regs, arch);
>> + regs__printf(false, user_regs, arch);
>> }
>>
>> static void regs_intr__printf(struct perf_sample *sample, const char *arch)
>> @@ -960,7 +967,7 @@ static void regs_intr__printf(struct perf_sample *sample, const char *arch)
>> struct regs_dump *intr_regs = &sample->intr_regs;
>>
>> if (intr_regs->regs)
>> - regs__printf("intr", intr_regs, arch);
>> + regs__printf(true, intr_regs, arch);
>> }
>>
>> static void stack_user__printf(struct stack_dump *dump)
>> diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
>> index a58444c4aed1..35c5d58aa45f 100644
>> --- a/tools/perf/util/synthetic-events.c
>> +++ b/tools/perf/util/synthetic-events.c
>> @@ -1538,7 +1538,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
>> if (type & PERF_SAMPLE_REGS_INTR) {
>> if (sample->intr_regs.abi) {
>> result += sizeof(u64);
>> - sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
>> + sz = bitmap_weight(sample->intr_regs.mask_ext,
>> + PERF_NUM_INTR_REGS * 64) *
>> + sizeof(u64);
>> result += sz;
>> } else {
>> result += sizeof(u64);
>> @@ -1741,7 +1743,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
>> if (type & PERF_SAMPLE_REGS_INTR) {
>> if (sample->intr_regs.abi) {
>> *array++ = sample->intr_regs.abi;
>> - sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
>> + sz = bitmap_weight(sample->intr_regs.mask_ext,
>> + PERF_NUM_INTR_REGS * 64) * sizeof(u64);
>> memcpy(array, sample->intr_regs.regs, sz);
>> array = (void *)array + sz;
>> } else {
>> --
>> 2.40.1
>>
>
Powered by blists - more mailing lists