[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAM9d7cgbgjz-X9Hv1o+kik+-QBAyDTShO4ChRiHehNFW+d_gOA@mail.gmail.com>
Date: Thu, 1 Sep 2022 22:29:12 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Kan Liang <kan.liang@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Ingo Molnar <mingo@...hat.com>,
Stephane Eranian <eranian@...gle.com>,
Michael Ellerman <mpe@...erman.id.au>,
linux-kernel <linux-kernel@...r.kernel.org>,
Andi Kleen <ak@...ux.intel.com>, andreas.kogler.0x@...il.com,
Athira Rajeev <atrajeev@...ux.vnet.ibm.com>,
Ravi Bangoria <ravi.bangoria@....com>
Subject: Re: [PATCH V2 4/6] perf: Use sample_flags for weight
On Thu, Sep 1, 2022 at 6:10 AM <kan.liang@...ux.intel.com> wrote:
>
> From: Kan Liang <kan.liang@...ux.intel.com>
>
> Use the new sample_flags to indicate whether the weight field is filled
> by the PMU driver.
>
> Remove the weight field from the perf_sample_data_init() to minimize the
> number of cache lines touched.
>
> Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
> ---
> arch/powerpc/perf/core-book3s.c | 5 +++--
> arch/x86/events/intel/ds.c | 10 +++++++---
> include/linux/perf_event.h | 3 +--
> kernel/events/core.c | 3 +++
> 4 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 1ad1efdb33f9..a5c95a2006ea 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2305,9 +2305,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
> ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
>
> if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
> - ppmu->get_mem_weight)
> + ppmu->get_mem_weight) {
> ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
> -
> + data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> + }
> if (perf_event_overflow(event, &data, regs))
> power_pmu_stop(event, 0);
> } else if (period) {
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 5dcfd2de6ef8..f66a4905cc87 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -1535,8 +1535,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
> /*
> * Use latency for weight (only avail with PEBS-LL)
> */
> - if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
> + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
> data->weight.full = pebs->lat;
> + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> + }
>
> /*
> * data.data_src encodes the data source
> @@ -1628,9 +1630,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
>
> if (x86_pmu.intel_cap.pebs_format >= 2) {
> /* Only set the TSX weight when no memory weight. */
> - if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
> + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
> data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
> -
> + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> + }
> if (sample_type & PERF_SAMPLE_TRANSACTION)
> data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
> pebs->ax);
> @@ -1772,6 +1775,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
> data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
> intel_get_tsx_weight(meminfo->tsx_tuning);
> }
> + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
I was thinking about splitting PERF_SAMPLE_WEIGHT and
PERF_SAMPLE_WEIGHT_STRUCT but it'd just add complexity
unnecessarily?
Thanks,
Namhyung
> }
>
> if (sample_type & PERF_SAMPLE_DATA_SRC)
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 1e12e79454e0..06a587b5faa9 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1012,7 +1012,6 @@ struct perf_sample_data {
> u64 addr;
> struct perf_raw_record *raw;
> u64 period;
> - union perf_sample_weight weight;
> u64 txn;
> union perf_mem_data_src data_src;
>
> @@ -1021,6 +1020,7 @@ struct perf_sample_data {
> * perf_{prepare,output}_sample().
> */
> struct perf_branch_stack *br_stack;
> + union perf_sample_weight weight;
>
> u64 type;
> u64 ip;
> @@ -1063,7 +1063,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> data->addr = addr;
> data->raw = NULL;
> data->period = period;
> - data->weight.full = 0;
> data->data_src.val = PERF_MEM_NA;
> data->txn = 0;
> }
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 104c0c9f4e6f..f0af45db02b3 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7408,6 +7408,9 @@ void perf_prepare_sample(struct perf_event_header *header,
> header->size += size;
> }
>
> + if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
> + data->weight.full = 0;
> +
> if (sample_type & PERF_SAMPLE_REGS_INTR) {
> /* regs dump ABI info */
> int size = sizeof(u64);
> --
> 2.35.1
>
Powered by blists - more mailing lists