[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250612000224.780337-8-irogers@google.com>
Date: Wed, 11 Jun 2025 17:02:16 -0700
From: Ian Rogers <irogers@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
Kan Liang <kan.liang@...ux.intel.com>,
"Andreas Färber" <afaerber@...e.de>, Manivannan Sadhasivam <mani@...nel.org>,
Caleb Biggers <caleb.biggers@...el.com>, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, Edward Baker <edward.baker@...el.com>
Subject: [PATCH v1 07/15] perf vendor events: Update GraniteRapids events
Update events from v1.08 to v1.10.
Bring in the event updates v1.10
https://github.com/intel/perfmon/commit/96259a932e2ce5f70ed7d347ca92fdeb78f83aa5
https://github.com/intel/perfmon/commit/19e315c8d2e0b44e170a6e60de44c9359062a6aa
Signed-off-by: Ian Rogers <irogers@...gle.com>
---
.../arch/x86/graniterapids/cache.json | 9 +++++
.../arch/x86/graniterapids/counter.json | 10 +++---
.../arch/x86/graniterapids/gnr-metrics.json | 36 +++++++++++++++++++
.../arch/x86/graniterapids/pipeline.json | 2 +-
.../graniterapids/uncore-interconnect.json | 19 ----------
.../arch/x86/graniterapids/uncore-io.json | 27 +++++++++++++-
tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +-
7 files changed, 78 insertions(+), 27 deletions(-)
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
index 32f99a8a3871..dbdeade6fe6f 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
@@ -977,6 +977,15 @@
"SampleAfterValue": "100003",
"UMask": "0x4"
},
+ {
+ "BriefDescription": "Offcore Uncacheable memory data read transactions.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.MEM_UC",
+ "PublicDescription": "This event counts noncacheable memory data read transactions. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
{
"BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/counter.json b/tools/perf/pmu-events/arch/x86/graniterapids/counter.json
index 5d3b202eadd3..d97211a0227e 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/counter.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/counter.json
@@ -59,6 +59,11 @@
"CountersNumFixed": "0",
"CountersNumGeneric": 4
},
+ {
+ "Unit": "UBOX",
+ "CountersNumFixed": "0",
+ "CountersNumGeneric": "2"
+ },
{
"Unit": "PCU",
"CountersNumFixed": "0",
@@ -73,10 +78,5 @@
"Unit": "MDF",
"CountersNumFixed": "0",
"CountersNumGeneric": "4"
- },
- {
- "Unit": "UBOX",
- "CountersNumFixed": "0",
- "CountersNumGeneric": "2"
}
]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
index af527f7f9d0c..9a620e1b8de8 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
@@ -95,6 +95,12 @@
"MetricName": "io_bandwidth_read",
"ScaleUnit": "1MB/s"
},
+ {
+ "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
{
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time",
@@ -113,6 +119,12 @@
"MetricName": "io_bandwidth_write",
"ScaleUnit": "1MB/s"
},
+ {
+ "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
{
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time",
@@ -125,6 +137,30 @@
"MetricName": "io_bandwidth_write_remote",
"ScaleUnit": "1MB/s"
},
+ {
+ "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
+ "MetricName": "io_full_write_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)",
+ "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time",
+ "MetricName": "io_msi",
+ "ScaleUnit": "1per_sec"
+ },
+ {
+ "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
+ "MetricName": "io_partial_write_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+ "MetricName": "io_read_l3_miss",
+ "ScaleUnit": "100%"
+ },
{
"BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
"MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
index 1edfdad1600d..27af3bd6bacf 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
@@ -738,7 +738,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
index e5bd11b27bcd..6667fbc50452 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
@@ -1915,24 +1915,6 @@
"UMask": "0x4",
"Unit": "UPI"
},
- {
- "BriefDescription": "Tx Flit Buffer Allocations : Number of allocations into the UPI Tx Flit Buffer. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.",
- "Counter": "0,1,2,3",
- "EventCode": "0x40",
- "EventName": "UNC_UPI_TxL_INSERTS",
- "Experimental": "1",
- "PerPkg": "1",
- "Unit": "UPI"
- },
- {
- "BriefDescription": "Tx Flit Buffer Occupancy : Accumulates the number of flits in the TxQ. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This can be used with the cycles not empty event to track average occupancy, or the allocations event to track average lifetime in the TxQ.",
- "Counter": "0,1,2,3",
- "EventCode": "0x42",
- "EventName": "UNC_UPI_TxL_OCCUPANCY",
- "Experimental": "1",
- "PerPkg": "1",
- "Unit": "UPI"
- },
{
"BriefDescription": "Message Received : Doorbell",
"Counter": "0,1",
@@ -1970,7 +1952,6 @@
"Counter": "0,1",
"EventCode": "0x42",
"EventName": "UNC_U_EVENT_MSG.MSI_RCVD",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)",
"UMask": "0x2",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
index 886b99a971be..f4f956966e16 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
@@ -1121,8 +1121,9 @@
"Unit": "IIO"
},
{
- "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing",
+ "BriefDescription": "This event is deprecated. [This event is alias to UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO]",
"Counter": "2,3",
+ "Deprecated": "1",
"EventCode": "0xc5",
"EventName": "UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO",
"Experimental": "1",
@@ -1132,6 +1133,18 @@
"UMask": "0x8",
"Unit": "IIO"
},
+ {
+ "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing [This event is alias to UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO]",
+ "Counter": "2,3",
+ "EventCode": "0xc5",
+ "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO",
+ "Experimental": "1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x0FF",
+ "UMask": "0x8",
+ "Unit": "IIO"
+ },
{
"BriefDescription": "Passing data to be written",
"Counter": "0,1,2,3",
@@ -1300,6 +1313,18 @@
"UMask": "0x4",
"Unit": "IIO"
},
+ {
+ "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x8e",
+ "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED",
+ "FCMask": "0x01",
+ "PerPkg": "1",
+ "PortMask": "0x0FF",
+ "PublicDescription": "-",
+ "UMask": "0x4",
+ "Unit": "IIO"
+ },
{
"BriefDescription": "All 9 bits of Page Walk Tracker Occupancy",
"Counter": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index f3fe686b6630..960076e3f66f 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -13,7 +13,7 @@ GenuineIntel-6-CF,v1.14,emeraldrapids,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
GenuineIntel-6-B6,v1.09,grandridge,core
-GenuineIntel-6-A[DE],v1.08,graniterapids,core
+GenuineIntel-6-A[DE],v1.10,graniterapids,core
GenuineIntel-6-(3C|45|46),v36,haswell,core
GenuineIntel-6-3F,v29,haswellx,core
GenuineIntel-6-7[DE],v1.24,icelake,core
--
2.50.0.rc1.591.g9c95f17f64-goog
Powered by blists - more mailing lists