[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240229001806.4158429-9-irogers@google.com>
Date: Wed, 28 Feb 2024 16:17:53 -0800
From: Ian Rogers <irogers@...gle.com>
To: Perry Taylor <perry.taylor@...el.com>, Samantha Alt <samantha.alt@...el.com>,
Caleb Biggers <caleb.biggers@...el.com>, Weilin Wang <weilin.wang@...el.com>,
Edward Baker <edward.baker@...el.com>, Andi Kleen <ak@...ux.intel.com>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
John Garry <john.g.garry@...cle.com>, Kan Liang <kan.liang@...ux.intel.com>,
Jing Zhang <renyu.zj@...ux.alibaba.com>, Thomas Richter <tmricht@...ux.ibm.com>,
James Clark <james.clark@....com>, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, Stephane Eranian <eranian@...gle.com>
Subject: [PATCH v1 08/20] perf jevents: Add L2 metrics for Intel
Give a breakdown of various L2 counters as metrics, including totals,
reads, hardware prefetcher, RFO, code and evictions.
Signed-off-by: Ian Rogers <irogers@...gle.com>
---
tools/perf/pmu-events/intel_metrics.py | 158 +++++++++++++++++++++++++
1 file changed, 158 insertions(+)
diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
index 63d46ee1dca9..d22a1abca8d9 100755
--- a/tools/perf/pmu-events/intel_metrics.py
+++ b/tools/perf/pmu-events/intel_metrics.py
@@ -271,6 +271,163 @@ def IntelBr():
description="breakdown of retired branch instructions")
+def IntelL2() -> Optional[MetricGroup]:
+ try:
+ DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT")
+ except:
+ return None
+ try:
+ DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS")
+ l2_dmnd_miss = DC_MISS
+ l2_dmnd_rd_all = DC_MISS + DC_HIT
+ except:
+ DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD")
+ l2_dmnd_miss = DC_ALL - DC_HIT
+ l2_dmnd_rd_all = DC_ALL
+ l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec)
+ l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec)
+
+ DC_PFH = None
+ DC_PFM = None
+ l2_pf_all = None
+ l2_pf_mrate = None
+ l2_pf_rrate = None
+ try:
+ DC_PFH = Event("L2_RQSTS.PF_HIT")
+ DC_PFM = Event("L2_RQSTS.PF_MISS")
+ l2_pf_all = DC_PFH + DC_PFM
+ l2_pf_mrate = d_ratio(DC_PFM, interval_sec)
+ l2_pf_rrate = d_ratio(l2_pf_all, interval_sec)
+ except:
+ pass
+
+ DC_RFOH = Event("L2_RQSTS.RFO_HIT")
+ DC_RFOM = Event("L2_RQSTS.RFO_MISS")
+ l2_rfo_all = DC_RFOH + DC_RFOM
+ l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec)
+ l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec)
+
+ DC_CH = Event("L2_RQSTS.CODE_RD_HIT")
+ DC_CM = Event("L2_RQSTS.CODE_RD_MISS")
+ DC_IN = Event("L2_LINES_IN.ALL")
+ DC_OUT_NS = None
+ DC_OUT_S = None
+ l2_lines_out = None
+ l2_out_rate = None
+ wbn = None
+ isd = None
+ try:
+ DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT",
+ "L2_LINES_OUT.DEMAND_DIRTY",
+ "L2_LINES_IN.S")
+ DC_OUT_S = Event("L2_LINES_OUT.SILENT",
+ "L2_LINES_OUT.DEMAND_CLEAN",
+ "L2_LINES_IN.I")
+ if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and (
+ args.model.startswith("skylake") or
+ args.model == "cascadelakex"):
+ DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/"
+ # bring is back to per-CPU
+ l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S)
+ l2_ns = DC_OUT_NS
+ l2_lines_out = l2_s + l2_ns;
+ l2_out_rate = d_ratio(l2_lines_out, interval_sec);
+ nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0)
+ wbn = d_ratio(nlr, interval_sec)
+ isd = d_ratio(l2_s, interval_sec)
+ except:
+ pass
+ DC_OUT_U = None
+ l2_pf_useless = None
+ l2_useless_rate = None
+ try:
+ DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF")
+ l2_pf_useless = DC_OUT_U
+ l2_useless_rate = d_ratio(l2_pf_useless, interval_sec)
+ except:
+ pass
+ DC_WB_U = None
+ DC_WB_D = None
+ wbu = None
+ wbd = None
+ try:
+ DC_WB_U = Event("IDI_MISC.WB_UPGRADE")
+ DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE")
+ wbu = d_ratio(DC_WB_U, interval_sec)
+ wbd = d_ratio(DC_WB_D, interval_sec)
+ except:
+ pass
+
+ l2_lines_in = DC_IN
+ l2_code_all = DC_CH + DC_CM
+ l2_code_rate = d_ratio(l2_code_all, interval_sec)
+ l2_code_miss_rate = d_ratio(DC_CM, interval_sec)
+ l2_in_rate = d_ratio(l2_lines_in, interval_sec)
+
+ return MetricGroup("l2", [
+ MetricGroup("l2_totals", [
+ Metric("l2_totals_in", "L2 cache total in per second",
+ l2_in_rate, "In/s"),
+ Metric("l2_totals_out", "L2 cache total out per second",
+ l2_out_rate, "Out/s") if l2_out_rate else None,
+ ]),
+ MetricGroup("l2_rd", [
+ Metric("l2_rd_hits", "L2 cache data read hits",
+ d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"),
+ Metric("l2_rd_hits", "L2 cache data read hits",
+ d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"),
+ Metric("l2_rd_requests", "L2 cache data read requests per second",
+ l2_dmnd_rrate, "requests/s"),
+ Metric("l2_rd_misses", "L2 cache data read misses per second",
+ l2_dmnd_mrate, "misses/s"),
+ ]),
+ MetricGroup("l2_hwpf", [
+ Metric("l2_hwpf_hits", "L2 cache hardware prefetcher hits",
+ d_ratio(DC_PFH, l2_pf_all), "100%"),
+ Metric("l2_hwpf_misses", "L2 cache hardware prefetcher misses",
+ d_ratio(DC_PFM, l2_pf_all), "100%"),
+ Metric("l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second",
+ l2_useless_rate, "100%") if l2_useless_rate else None,
+ Metric("l2_hwpf_requests", "L2 cache hardware prefetcher requests per second",
+ l2_pf_rrate, "100%"),
+ Metric("l2_hwpf_misses", "L2 cache hardware prefetcher misses per second",
+ l2_pf_mrate, "100%"),
+ ]) if DC_PFH else None,
+ MetricGroup("l2_rfo", [
+ Metric("l2_rfo_hits", "L2 cache request for ownership (RFO) hits",
+ d_ratio(DC_RFOH, l2_rfo_all), "100%"),
+ Metric("l2_rfo_misses", "L2 cache request for ownership (RFO) misses",
+ d_ratio(DC_RFOM, l2_rfo_all), "100%"),
+ Metric("l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second",
+ l2_rfo_rrate, "requests/s"),
+ Metric("l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second",
+ l2_rfo_mrate, "misses/s"),
+ ]),
+ MetricGroup("l2_code", [
+ Metric("l2_code_hits", "L2 cache code hits",
+ d_ratio(DC_CH, l2_code_all), "100%"),
+ Metric("l2_code_misses", "L2 cache code misses",
+ d_ratio(DC_CM, l2_code_all), "100%"),
+ Metric("l2_code_requests", "L2 cache code requests per second",
+ l2_code_rate, "requests/s"),
+ Metric("l2_code_misses", "L2 cache code misses per second",
+ l2_code_miss_rate, "misses/s"),
+ ]),
+ MetricGroup("l2_evict", [
+ MetricGroup("l2_evict_mef_lines", [
+ Metric("l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second",
+ wbu, "HotLRU/s") if wbu else None,
+ Metric("l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second",
+ wbn, "NormLRU/s") if wbn else None,
+ Metric("l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second",
+ wbd, "dropped/s") if wbd else None,
+ Metric("l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second",
+ isd, "dropped/s") if isd else None,
+ ]),
+ ]),
+ ], description = "L2 data cache analysis")
+
+
def IntelPorts() -> Optional[MetricGroup]:
pipeline_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}"
f"/arch/x86/{args.model}/pipeline.json"))
@@ -363,6 +520,7 @@ all_metrics = MetricGroup("", [
Smi(),
Tsx(),
IntelBr(),
+ IntelL2(),
IntelPorts(),
IntelSwpf(),
])
--
2.44.0.278.ge034bb2e1d-goog
Powered by blists - more mailing lists