[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240229001806.4158429-20-irogers@google.com>
Date: Wed, 28 Feb 2024 16:18:04 -0800
From: Ian Rogers <irogers@...gle.com>
To: Perry Taylor <perry.taylor@...el.com>, Samantha Alt <samantha.alt@...el.com>,
Caleb Biggers <caleb.biggers@...el.com>, Weilin Wang <weilin.wang@...el.com>,
Edward Baker <edward.baker@...el.com>, Andi Kleen <ak@...ux.intel.com>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
John Garry <john.g.garry@...cle.com>, Kan Liang <kan.liang@...ux.intel.com>,
Jing Zhang <renyu.zj@...ux.alibaba.com>, Thomas Richter <tmricht@...ux.ibm.com>,
James Clark <james.clark@....com>, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, Stephane Eranian <eranian@...gle.com>
Subject: [PATCH v1 19/20] perf jevents: Add local/remote miss latency metrics
for Intel
Derive from CBOX/CHA occupancy and inserts the average latency as is
provided in Intel's uncore performance monitoring reference.
Signed-off-by: Ian Rogers <irogers@...gle.com>
---
tools/perf/pmu-events/intel_metrics.py | 59 ++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
index 1b9f7cd3b789..cdeb58e17c5e 100755
--- a/tools/perf/pmu-events/intel_metrics.py
+++ b/tools/perf/pmu-events/intel_metrics.py
@@ -617,6 +617,64 @@ def IntelL2() -> Optional[MetricGroup]:
], description = "L2 data cache analysis")
+def IntelMissLat() -> Optional[MetricGroup]:
+ try:
+ ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS")
+ data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL",
+ "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
+ "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE",
+ "UNC_C_TOR_OCCUPANCY.MISS_OPCODE")
+ data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL",
+ "UNC_CHA_TOR_INSERTS.IA_MISS",
+ "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE",
+ "UNC_C_TOR_INSERTS.MISS_OPCODE")
+ data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE",
+ "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
+ "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE",
+ "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE")
+ data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE",
+ "UNC_CHA_TOR_INSERTS.IA_MISS",
+ "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE",
+ "UNC_C_TOR_INSERTS.NID_MISS_OPCODE")
+ except:
+ return None
+
+ if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or
+ data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"):
+ data_rd = 0x182
+ for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]:
+ e.name += f"/filter_opc={hex(data_rd)}/"
+ elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS":
+ # Demand Data Read - Full cache-line read requests from core for
+ # lines to be cached in S or E, typically for data
+ demand_data_rd = 0x202
+ # LLC Prefetch Data - Uncore will first look up the line in the
+ # LLC; for a cache hit, the LRU will be updated, on a miss, the
+ # DRd will be initiated
+ llc_prefetch_data = 0x25a
+ local_filter = (f"/filter_opc0={hex(demand_data_rd)},"
+ f"filter_opc1={hex(llc_prefetch_data)},"
+ "filter_loc,filter_nm,filter_not_nm/")
+ remote_filter = (f"/filter_opc0={hex(demand_data_rd)},"
+ f"filter_opc1={hex(llc_prefetch_data)},"
+ "filter_rem,filter_nm,filter_not_nm/")
+ for e in [data_rd_loc_occ, data_rd_loc_ins]:
+ e.name += local_filter
+ for e in [data_rd_rem_occ, data_rd_rem_ins]:
+ e.name += remote_filter
+ else:
+ assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ
+
+ loc_lat = interval_sec * 1e9 * data_rd_loc_occ / (ticks * data_rd_loc_ins)
+ rem_lat = interval_sec * 1e9 * data_rd_rem_occ / (ticks * data_rd_rem_ins)
+ return MetricGroup("miss_lat", [
+ Metric("miss_lat_loc", "Local to a socket miss latency in nanoseconds",
+ loc_lat, "ns"),
+ Metric("miss_lat_rem", "Remote to a socket miss latency in nanoseconds",
+ rem_lat, "ns"),
+ ])
+
+
def IntelMlp() -> Optional[Metric]:
try:
l1d = Event("L1D_PEND_MISS.PENDING")
@@ -960,6 +1018,7 @@ all_metrics = MetricGroup("", [
IntelIlp(),
IntelL2(),
IntelLdSt(),
+ IntelMissLat(),
IntelMlp(),
IntelPorts(),
IntelSwpf(),
--
2.44.0.278.ge034bb2e1d-goog
Powered by blists - more mailing lists