lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250529123456.1801-3-ravi.bangoria@amd.com>
Date: Thu, 29 May 2025 12:34:54 +0000
From: Ravi Bangoria <ravi.bangoria@....com>
To: Peter Zijlstra <peterz@...radead.org>, Arnaldo Carvalho de Melo
	<acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>
CC: Ravi Bangoria <ravi.bangoria@....com>, Ingo Molnar <mingo@...hat.com>,
	Stephane Eranian <eranian@...gle.com>, Ian Rogers <irogers@...gle.com>, "Kan
 Liang" <kan.liang@...ux.intel.com>, James Clark <james.clark@...aro.org>,
	"Leo Yan" <leo.yan@....com>, Joe Mario <jmario@...hat.com>,
	<linux-kernel@...r.kernel.org>, <linux-perf-users@...r.kernel.org>, "Santosh
 Shukla" <santosh.shukla@....com>, Ananth Narayan <ananth.narayan@....com>,
	Sandipan Das <sandipan.das@....com>
Subject: [PATCH 2/4] perf mem/c2c amd: Wire IBS OP PMU load/store SW filter

Currently, perf mem/c2c tools on AMD records ibs_op// pmu event, which
has no inherent load/store filtering capability. But with the addition
of a SW based load/store filtering capability in IBS OP PMU, perf mem/
c2c tools can now record load-only, store-only and load-store-only
events. Wire them accordingly.

Before:
  $ perf mem record -v -e list
  mem-ldst     ibs_op//          : available

After:
  $ perf mem record -v -e list
  mem-load     ibs_op/swfilt=1,ldop=1/  : available
  mem-store    ibs_op/swfilt=1,stop=1/  : available
  mem-ldst     ibs_op/swfilt=1,ldop=1,stop=1/: available

Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
---
 tools/perf/arch/x86/util/mem-events.c      | 12 ++++++++++
 tools/perf/arch/x86/util/mem-events.h      |  2 ++
 tools/perf/arch/x86/util/pmu.c             | 17 +++++++++-----
 tools/perf/tests/shell/test_data_symbol.sh | 26 +++++++++++++---------
 tools/perf/util/mem-events.c               |  2 +-
 5 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index b38f519020ff..0a034317b55c 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -32,3 +32,15 @@ struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = {
 	E(NULL,		NULL,		NULL,	false,	0),
 	E("mem-ldst",	"%s/ldlat=%u/",	NULL,	true,	0),
 };
+
+struct perf_mem_event perf_mem_events_amd_swfilt[PERF_MEM_EVENTS__MAX] = {
+	E("mem-load",	"%s/swfilt=1,ldop=1/",		NULL,	false,	0),
+	E("mem-store",	"%s/swfilt=1,stop=1/",		NULL,	false,	0),
+	E("mem-ldst",	"%s/swfilt=1,ldop=1,stop=1/",	NULL,	false,	0),
+};
+
+struct perf_mem_event perf_mem_events_amd_ldlat_swfilt[PERF_MEM_EVENTS__MAX] = {
+	E("mem-load",	"%s/ldlat=%u,swfilt=1,ldop=1/",		NULL,	true,	0),
+	E("mem-store",	"%s/swfilt=1,stop=1/",			NULL,	false,	0),
+	E("mem-ldst",	"%s/ldlat=%u,swfilt=1,ldop=1,stop=1/",	NULL,	true,	0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
index 11e09a256f5b..bde1e51f639c 100644
--- a/tools/perf/arch/x86/util/mem-events.h
+++ b/tools/perf/arch/x86/util/mem-events.h
@@ -7,5 +7,7 @@ extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
 
 extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
 extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_swfilt[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_ldlat_swfilt[PERF_MEM_EVENTS__MAX];
 
 #endif /* _X86_MEM_EVENTS_H */
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 58113482654b..34f25ec9cca6 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -271,7 +271,7 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
 
 void perf_pmu__arch_init(struct perf_pmu *pmu)
 {
-	struct perf_pmu_caps *ldlat_cap;
+	struct perf_pmu_caps *ldlat_cap, *swfilt_ldst_cap;
 
 #ifdef HAVE_AUXTRACE_SUPPORT
 	if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
@@ -295,11 +295,18 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
 			return;
 
 		ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
-		if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
-			return;
+		swfilt_ldst_cap = perf_pmu__get_cap(pmu, "swfilt_ldst");
+
+		if (ldlat_cap && !strcmp(ldlat_cap->value, "1")) {
+			perf_mem_events__loads_ldlat = 0;
 
-		perf_mem_events__loads_ldlat = 0;
-		pmu->mem_events = perf_mem_events_amd_ldlat;
+			if (swfilt_ldst_cap && !strcmp(swfilt_ldst_cap->value, "1"))
+				pmu->mem_events = perf_mem_events_amd_ldlat_swfilt;
+			else
+				pmu->mem_events = perf_mem_events_amd_ldlat;
+		} else if (swfilt_ldst_cap && !strcmp(swfilt_ldst_cap->value, "1")) {
+			pmu->mem_events = perf_mem_events_amd_swfilt;
+		}
 	} else {
 		if (pmu->is_core) {
 			if (perf_pmu__have_event(pmu, "mem-loads-aux"))
diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
index d61b5659a46d..e2156c9a4bc9 100755
--- a/tools/perf/tests/shell/test_data_symbol.sh
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -57,7 +57,7 @@ echo "Recording workload..."
 is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
 if (($is_amd >= 1)); then
 	mem_events="$(perf mem record -v -e list 2>&1)"
-	if ! [[ "$mem_events" =~ ^mem\-ldst.*ibs_op/(.*)/.*available ]]; then
+	if ! [[ "$mem_events" =~ mem\-ldst.*ibs_op/(.*)/.*available ]]; then
 		echo "ERROR: mem-ldst event is not matching"
 		exit 1
 	fi
@@ -65,18 +65,22 @@ if (($is_amd >= 1)); then
 	# --ldlat on AMD:
 	# o Zen4 and earlier uarch does not support ldlat
 	# o Even on supported platforms, it's disabled (--ldlat=0) by default.
-	ldlat=${BASH_REMATCH[1]}
-	if [[ -n $ldlat ]]; then
-		if ! [[ "$ldlat" =~ ldlat=0 ]]; then
-			echo "ERROR: ldlat not initialized to 0?"
-			exit 1
+	format=${BASH_REMATCH[1]}
+	if [[ $format =~ ldlat=(\d*) ]]; then
+		ldlat=${BASH_REMATCH[1]}
+		if [[ -n $ldlat ]]; then
+			if ! [[ "$ldlat" =~ ldlat=0 ]]; then
+				echo "ERROR: ldlat not initialized to 0?"
+				exit 1
+			fi
+
+			mem_events="$(perf mem record -v --ldlat=150 -e list 2>&1)"
+			if ! [[ "$mem_events" =~ ^mem-ldst.*ibs_op/ldlat=150/.*available ]]; then
+				echo "ERROR: --ldlat not honored?"
+				exit 1
+			fi
 		fi
 
-		mem_events="$(perf mem record -v --ldlat=150 -e list 2>&1)"
-		if ! [[ "$mem_events" =~ ^mem-ldst.*ibs_op/ldlat=150/.*available ]]; then
-			echo "ERROR: --ldlat not honored?"
-			exit 1
-		fi
 	fi
 
 	# perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 80b3069427bc..24afdd12bed2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -93,7 +93,7 @@ static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
 	if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) {
 		if (e->ldlat) {
 			if (!e->aux_event) {
-				/* ARM and Most of Intel */
+				/* ARM, AMD and Most of Intel */
 				scnprintf(buf, buf_size,
 					  e->name, pmu->name,
 					  perf_mem_events__loads_ldlat);
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ