lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241025144303.GC359792@e132581.arm.com>
Date: Fri, 25 Oct 2024 15:43:03 +0100
From: Leo Yan <leo.yan@....com>
To: Ilkka Koskinen <ilkka@...amperecomputing.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Namhyung Kim <namhyung@...nel.org>,
	Mark Rutland <mark.rutland@....com>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
	Adrian Hunter <adrian.hunter@...el.com>,
	"Liang, Kan" <kan.liang@...ux.intel.com>,
	John Garry <john.g.garry@...cle.com>, Will Deacon <will@...nel.org>,
	James Clark <james.clark@...aro.org>,
	Mike Leach <mike.leach@...aro.org>, Leo Yan <leo.yan@...ux.dev>,
	linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
	linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on
 AmpereOne

On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
> 
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@...amperecomputing.com>
> ---
>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>  tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>  2 files changed, 70 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>         ARM_SPE_NV_DRAM          = 0xe,
>  };
> 
> +enum arm_spe_ampereone_data_source {
> +       ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
> +       ARM_SPE_AMPEREONE_SLC                           = 0x3,
> +       ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
> +       ARM_SPE_AMPEREONE_DDR                           = 0x7,
> +       ARM_SPE_AMPEREONE_L1D                           = 0x8,
> +       ARM_SPE_AMPEREONE_L2D                           = 0x9,
> +};
> +
>  struct arm_spe_record {
>         enum arm_spe_sample_type type;
>         int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>                 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>  }
> 
> +static const struct midr_range ampereone_source_spe[] = {
> +       MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> +       {},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> +                                                union perf_mem_data_src *data_src,
> +                                                u64 midr)
> +{
> +       if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> +               arm_spe__synth_data_source_generic(record, data_src);
> +               return;
> +       }

With James' suggestion, I don't think here need to check the CPU
variant again.  All generic data source generating should run in the 
arm_spe__synth_data_source() function.

> +
> +       if (record->op & ARM_SPE_OP_ST) {
> +               data_src->mem_lvl = PERF_MEM_LVL_NA;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NA;
> +               return;
> +       }
> +
> +       switch (record->source) {
> +       case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +               break;
> +       case ARM_SPE_AMPEREONE_SLC:
> +               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
> +               break;
> +       case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
> +               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> +               data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +               break;
> +       case ARM_SPE_AMPEREONE_DDR:
> +               data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;
> +       case ARM_SPE_AMPEREONE_L1D:
> +               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;
> +       case ARM_SPE_AMPEREONE_L2D:
> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;

We have another way to do this.  If convert the SoC specific data source
to common data source values, e.g.

  ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
  ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
  ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
  ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
  ...

Then we don't need to maintain two functions with almost same setting.

I have no strong opinion for this. A dedicated function for Ampere CPU
might give a bit flexiblity for later tweaking. It is up to you.

Last thing, please work on the the latest perf-tools-next branch:

  https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
  branch: perf-tools-next

Recently we have Arm SPE data source refactoring, please rebase on it.

Thanks,
Leo

> +       default:
> +               break;
> +       }
> +}
> +
>  static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>  {
>         union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>         bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> +       bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
> 
>         if (record->op & ARM_SPE_OP_LD)
>                 data_src.mem_op = PERF_MEM_OP_LOAD;
> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
> 
>         if (is_neoverse)
>                 arm_spe__synth_data_source_neoverse(record, &data_src);
> +       else if (is_ampereone)
> +               arm_spe__synth_data_source_ampereone(record, &data_src, midr);
>         else
>                 arm_spe__synth_data_source_generic(record, &data_src);
> 
> --
> 2.47.0
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ