[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220718164312.3994191-2-irogers@google.com>
Date: Mon, 18 Jul 2022 09:43:10 -0700
From: Ian Rogers <irogers@...gle.com>
To: perry.taylor@...el.com, caleb.biggers@...el.com,
kshipra.bopardikar@...el.com,
Kan Liang <kan.liang@...ux.intel.com>,
Zhengjun Xing <zhengjun.xing@...ux.intel.com>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...hat.com>,
Namhyung Kim <namhyung@...nel.org>,
Maxime Coquelin <mcoquelin.stm32@...il.com>,
Alexandre Torgue <alexandre.torgue@...s.st.com>,
Andi Kleen <ak@...ux.intel.com>,
James Clark <james.clark@....com>,
John Garry <john.garry@...wei.com>,
linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org
Cc: Stephane Eranian <eranian@...gle.com>,
Ian Rogers <irogers@...gle.com>
Subject: [PATCH v4 1/3] perf tsc: Add arch TSC frequency information
From: Kan Liang <kan.liang@...ux.intel.com>
The TSC frequency information is required for the event metrics with
the literal, system_tsc_freq. For the newer Intel platform, the TSC
frequency information can be retrieved from the CPUID leaf 0x15.
If the TSC frequency information isn't present the /proc/cpuinfo
approach is used.
Refactor cpuid for this use. Note, the previous stack pushing/popping
approach was broken on x86-64 that has stack red zones that would be
clobbered.
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Ian Rogers <irogers@...gle.com>
---
tools/perf/arch/x86/util/cpuid.h | 34 +++++++++++++++++++++++++++++++
tools/perf/arch/x86/util/header.c | 27 ++++++++++--------------
tools/perf/arch/x86/util/tsc.c | 33 ++++++++++++++++++++++++++++++
tools/perf/util/expr.c | 13 ++++++++++++
tools/perf/util/tsc.h | 1 +
5 files changed, 92 insertions(+), 16 deletions(-)
create mode 100644 tools/perf/arch/x86/util/cpuid.h
diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h
new file mode 100644
index 000000000000..0a3ae0ace7e9
--- /dev/null
+++ b/tools/perf/arch/x86/util/cpuid.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_CPUID_H
+#define PERF_CPUID_H 1
+
+
+static inline void
+cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b,
+ unsigned int *c, unsigned int *d)
+{
+ /*
+ * Preserve %ebx/%rbx register by either placing it in %rdi or saving it
+ * on the stack - x86-64 needs to avoid the stack red zone. In PIC
+ * compilations %ebx contains the address of the global offset
+ * table. %rbx is occasionally used to address stack variables in
+ * presence of dynamic allocas.
+ */
+ asm(
+#if defined(__x86_64__)
+ "mov %%rbx, %%rdi\n"
+ "cpuid\n"
+ "xchg %%rdi, %%rbx\n"
+#else
+ "pushl %%ebx\n"
+ "cpuid\n"
+ "movl %%ebx, %%edi\n"
+ "popl %%ebx\n"
+#endif
+ : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d)
+ : "a"(op), "2"(op2));
+}
+
+void get_cpuid_0(char *vendor, unsigned int *lvl);
+
+#endif
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 578c8c568ffd..a51444a77a5f 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -9,18 +9,17 @@
#include "../../../util/debug.h"
#include "../../../util/header.h"
+#include "cpuid.h"
-static inline void
-cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
- unsigned int *d)
+void get_cpuid_0(char *vendor, unsigned int *lvl)
{
- __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
- "movl %%ebx, %%esi\n\t.byte 0x5b"
- : "=a" (*a),
- "=S" (*b),
- "=c" (*c),
- "=d" (*d)
- : "a" (op));
+ unsigned int b, c, d;
+
+ cpuid(0, 0, lvl, &b, &c, &d);
+ strncpy(&vendor[0], (char *)(&b), 4);
+ strncpy(&vendor[4], (char *)(&d), 4);
+ strncpy(&vendor[8], (char *)(&c), 4);
+ vendor[12] = '\0';
}
static int
@@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
int nb;
char vendor[16];
- cpuid(0, &lvl, &b, &c, &d);
- strncpy(&vendor[0], (char *)(&b), 4);
- strncpy(&vendor[4], (char *)(&d), 4);
- strncpy(&vendor[8], (char *)(&c), 4);
- vendor[12] = '\0';
+ get_cpuid_0(vendor, &lvl);
if (lvl >= 1) {
- cpuid(1, &a, &b, &c, &d);
+ cpuid(1, 0, &a, &b, &c, &d);
family = (a >> 8) & 0xf; /* bits 11 - 8 */
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 559365f8fe52..b69144f22489 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
+#include <string.h>
#include "../../../util/tsc.h"
+#include "cpuid.h"
u64 rdtsc(void)
{
@@ -11,3 +13,34 @@ u64 rdtsc(void)
return low | ((u64)high) << 32;
}
+
+double arch_get_tsc_freq(void)
+{
+ unsigned int a, b, c, d, lvl;
+ static bool cached;
+ static double tsc;
+ char vendor[16];
+
+ if (cached)
+ return tsc;
+
+ cached = true;
+ get_cpuid_0(vendor, &lvl);
+ if (!strstr(vendor, "Intel"))
+ return 0;
+
+ /*
+ * Don't support Time Stamp Counter and
+ * Nominal Core Crystal Clock Information Leaf.
+ */
+ if (lvl < 0x15)
+ return 0;
+
+ cpuid(0x15, 0, &a, &b, &c, &d);
+ /* TSC frequency is not enumerated */
+ if (!a || !b || !c)
+ return 0;
+
+ tsc = (double)c * (double)b / (double)a;
+ return tsc;
+}
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 675f318ce7c1..c15a9852fa41 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -12,6 +12,7 @@
#include "expr-bison.h"
#include "expr-flex.h"
#include "smt.h"
+#include "tsc.h"
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
@@ -402,6 +403,13 @@ double expr_id_data__source_count(const struct expr_id_data *data)
return data->val.source_count;
}
+#if !defined(__i386__) && !defined(__x86_64__)
+double arch_get_tsc_freq(void)
+{
+ return 0.0;
+}
+#endif
+
double expr__get_literal(const char *literal)
{
static struct cpu_topology *topology;
@@ -417,6 +425,11 @@ double expr__get_literal(const char *literal)
goto out;
}
+ if (!strcasecmp("#system_tsc_freq", literal)) {
+ result = arch_get_tsc_freq();
+ goto out;
+ }
+
/*
* Assume that topology strings are consistent, such as CPUs "0-1"
* wouldn't be listed as "0,1", and so after deduplication the number of
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
index 7d83a31732a7..88fd1c4c1cb8 100644
--- a/tools/perf/util/tsc.h
+++ b/tools/perf/util/tsc.h
@@ -25,6 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
u64 rdtsc(void);
+double arch_get_tsc_freq(void);
size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp);
--
2.37.0.170.g444d1eabd0-goog
Powered by blists - more mailing lists