[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250529123456.1801-3-ravi.bangoria@amd.com>
Date: Thu, 29 May 2025 12:34:54 +0000
From: Ravi Bangoria <ravi.bangoria@....com>
To: Peter Zijlstra <peterz@...radead.org>, Arnaldo Carvalho de Melo
<acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>
CC: Ravi Bangoria <ravi.bangoria@....com>, Ingo Molnar <mingo@...hat.com>,
Stephane Eranian <eranian@...gle.com>, Ian Rogers <irogers@...gle.com>, "Kan
Liang" <kan.liang@...ux.intel.com>, James Clark <james.clark@...aro.org>,
"Leo Yan" <leo.yan@....com>, Joe Mario <jmario@...hat.com>,
<linux-kernel@...r.kernel.org>, <linux-perf-users@...r.kernel.org>, "Santosh
Shukla" <santosh.shukla@....com>, Ananth Narayan <ananth.narayan@....com>,
Sandipan Das <sandipan.das@....com>
Subject: [PATCH 2/4] perf mem/c2c amd: Wire IBS OP PMU load/store SW filter
Currently, perf mem/c2c tools on AMD records ibs_op// pmu event, which
has no inherent load/store filtering capability. But with the addition
of a SW based load/store filtering capability in IBS OP PMU, perf mem/
c2c tools can now record load-only, store-only and load-store-only
events. Wire them accordingly.
Before:
$ perf mem record -v -e list
mem-ldst ibs_op// : available
After:
$ perf mem record -v -e list
mem-load ibs_op/swfilt=1,ldop=1/ : available
mem-store ibs_op/swfilt=1,stop=1/ : available
mem-ldst ibs_op/swfilt=1,ldop=1,stop=1/: available
Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
---
tools/perf/arch/x86/util/mem-events.c | 12 ++++++++++
tools/perf/arch/x86/util/mem-events.h | 2 ++
tools/perf/arch/x86/util/pmu.c | 17 +++++++++-----
tools/perf/tests/shell/test_data_symbol.sh | 26 +++++++++++++---------
tools/perf/util/mem-events.c | 2 +-
5 files changed, 42 insertions(+), 17 deletions(-)
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index b38f519020ff..0a034317b55c 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -32,3 +32,15 @@ struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = {
E(NULL, NULL, NULL, false, 0),
E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0),
};
+
+struct perf_mem_event perf_mem_events_amd_swfilt[PERF_MEM_EVENTS__MAX] = {
+ E("mem-load", "%s/swfilt=1,ldop=1/", NULL, false, 0),
+ E("mem-store", "%s/swfilt=1,stop=1/", NULL, false, 0),
+ E("mem-ldst", "%s/swfilt=1,ldop=1,stop=1/", NULL, false, 0),
+};
+
+struct perf_mem_event perf_mem_events_amd_ldlat_swfilt[PERF_MEM_EVENTS__MAX] = {
+ E("mem-load", "%s/ldlat=%u,swfilt=1,ldop=1/", NULL, true, 0),
+ E("mem-store", "%s/swfilt=1,stop=1/", NULL, false, 0),
+ E("mem-ldst", "%s/ldlat=%u,swfilt=1,ldop=1,stop=1/", NULL, true, 0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
index 11e09a256f5b..bde1e51f639c 100644
--- a/tools/perf/arch/x86/util/mem-events.h
+++ b/tools/perf/arch/x86/util/mem-events.h
@@ -7,5 +7,7 @@ extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_swfilt[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_ldlat_swfilt[PERF_MEM_EVENTS__MAX];
#endif /* _X86_MEM_EVENTS_H */
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 58113482654b..34f25ec9cca6 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -271,7 +271,7 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
void perf_pmu__arch_init(struct perf_pmu *pmu)
{
- struct perf_pmu_caps *ldlat_cap;
+ struct perf_pmu_caps *ldlat_cap, *swfilt_ldst_cap;
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
@@ -295,11 +295,18 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
return;
ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
- if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
- return;
+ swfilt_ldst_cap = perf_pmu__get_cap(pmu, "swfilt_ldst");
+
+ if (ldlat_cap && !strcmp(ldlat_cap->value, "1")) {
+ perf_mem_events__loads_ldlat = 0;
- perf_mem_events__loads_ldlat = 0;
- pmu->mem_events = perf_mem_events_amd_ldlat;
+ if (swfilt_ldst_cap && !strcmp(swfilt_ldst_cap->value, "1"))
+ pmu->mem_events = perf_mem_events_amd_ldlat_swfilt;
+ else
+ pmu->mem_events = perf_mem_events_amd_ldlat;
+ } else if (swfilt_ldst_cap && !strcmp(swfilt_ldst_cap->value, "1")) {
+ pmu->mem_events = perf_mem_events_amd_swfilt;
+ }
} else {
if (pmu->is_core) {
if (perf_pmu__have_event(pmu, "mem-loads-aux"))
diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
index d61b5659a46d..e2156c9a4bc9 100755
--- a/tools/perf/tests/shell/test_data_symbol.sh
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -57,7 +57,7 @@ echo "Recording workload..."
is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
if (($is_amd >= 1)); then
mem_events="$(perf mem record -v -e list 2>&1)"
- if ! [[ "$mem_events" =~ ^mem\-ldst.*ibs_op/(.*)/.*available ]]; then
+ if ! [[ "$mem_events" =~ mem\-ldst.*ibs_op/(.*)/.*available ]]; then
echo "ERROR: mem-ldst event is not matching"
exit 1
fi
@@ -65,18 +65,22 @@ if (($is_amd >= 1)); then
# --ldlat on AMD:
# o Zen4 and earlier uarch does not support ldlat
# o Even on supported platforms, it's disabled (--ldlat=0) by default.
- ldlat=${BASH_REMATCH[1]}
- if [[ -n $ldlat ]]; then
- if ! [[ "$ldlat" =~ ldlat=0 ]]; then
- echo "ERROR: ldlat not initialized to 0?"
- exit 1
+ format=${BASH_REMATCH[1]}
+ if [[ $format =~ ldlat=(\d*) ]]; then
+ ldlat=${BASH_REMATCH[1]}
+ if [[ -n $ldlat ]]; then
+ if ! [[ "$ldlat" =~ ldlat=0 ]]; then
+ echo "ERROR: ldlat not initialized to 0?"
+ exit 1
+ fi
+
+ mem_events="$(perf mem record -v --ldlat=150 -e list 2>&1)"
+ if ! [[ "$mem_events" =~ ^mem-ldst.*ibs_op/ldlat=150/.*available ]]; then
+ echo "ERROR: --ldlat not honored?"
+ exit 1
+ fi
fi
- mem_events="$(perf mem record -v --ldlat=150 -e list 2>&1)"
- if ! [[ "$mem_events" =~ ^mem-ldst.*ibs_op/ldlat=150/.*available ]]; then
- echo "ERROR: --ldlat not honored?"
- exit 1
- fi
fi
# perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 80b3069427bc..24afdd12bed2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -93,7 +93,7 @@ static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) {
if (e->ldlat) {
if (!e->aux_event) {
- /* ARM and Most of Intel */
+ /* ARM, AMD and Most of Intel */
scnprintf(buf, buf_size,
e->name, pmu->name,
perf_mem_events__loads_ldlat);
--
2.43.0
Powered by blists - more mailing lists