lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211015204713.2855996-2-goldstein.w.n@gmail.com>
Date:   Fri, 15 Oct 2021 15:47:17 -0500
From:   Noah Goldstein <goldstein.w.n@...il.com>
To:     unlisted-recipients:; (no To-header on input)
Cc:     goldstein.w.n@...il.com, tglx@...utronix.de, mingo@...hat.com,
        bp@...en8.de, x86@...nel.org, hpa@...or.com, luto@...nel.org,
        linux-kernel@...r.kernel.org
Subject: [PATCH v2 2/2] x86/xstate: Make AVX512 status tracking more accurate

This patch splits the timestamps for tracking the AVX512 status into
'avx512_ZMM_Hi256_timestamp' and 'avx512_Hi16_ZMM_timestamp'. They are
used for tracking XFEATURE_ZMM_Hi256 and XFEATURE_Hi16_ZMM use
respectively.

The purpose of tracking the AVX512 status is to convey information
about possible frequency throttling. The current implementation has
false positives on XFEATURE_OPMASK use and any usage of the hi16 xmm
and ymm registers which are included in the XFEATURE_Hi16_ZMM set as
neither will cause frequency throttling.

This patches implementation avoids to add more clarity to the
output. The 'avx512_ZMM_Hi256_timestamp' will not have false positives
so its value will at least be indicative frequency throttling. Since
'avx512_Hi16_ZMM_timestamp' can still indicate frequency throttling
from zmm16...zmm31 use though had false positives it is separated.

Signed-off-by: Noah Goldstein <goldstein.w.n@...il.com>
---
Because (Hi16_ZMM | ZMM_Hi256) will likely be full of false positives
on any machine that support avx512 I split the avx512_timestamp.

One to track Hi16_ZMM and one for ZMM_Hi256. My though is it's not
good to add more holes where the status doesn't report, but also have
fields that will be useful and not be burdened by false positives.

This might be overdoing it. If so we can either use a both of both or
just ZMM_Hi256.

 arch/x86/include/asm/fpu/types.h | 16 +++++++++---
 arch/x86/kernel/fpu/xstate.c     | 45 ++++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f5a38a5f3ae1..cb10909fa3da 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -330,11 +330,21 @@ struct fpu {
 	unsigned int			last_cpu;
 
 	/*
-	 * @avx512_timestamp:
+	 * @avx512_ZMM_Hi256_timestamp:
 	 *
-	 * Records the timestamp of AVX512 use during last context switch.
+	 * Records the timestamp of AVX512 use in the ZMM_Hi256 xfeature
+	 * set. This include zmm0...zmm15.
 	 */
-	unsigned long			avx512_timestamp;
+	unsigned long			avx512_ZMM_Hi256_timestamp;
+
+	/*
+	 * @avx512_Hi16_ZMM_timestamp:
+	 *
+	 * Records the timestamp of AVX512 use in the Hi16_ZMM xfeature
+	 * set. This includes usage of any of the hi16 xmm, ymm, or zmm
+	 * registers.
+	 */
+	unsigned long			avx512_Hi16_ZMM_timestamp;
 
 	/*
 	 * @state:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 00b495914be2..5b0ff609af2f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1246,7 +1246,7 @@ void xrstors(struct xregs_state *xstate, u64 mask)
 }
 
 /*
- * Track of the state of desired avx architecture features.
+ * Track of the state of desired avx related xfeatures.
  */
 void fpu_update_avx_timestamp(struct fpu *fpu)
 {
@@ -1254,18 +1254,28 @@ void fpu_update_avx_timestamp(struct fpu *fpu)
 	 * AVX512 state is tracked here because its use is known to slow
 	 * the max clock speed of the core.
 	 */
-	if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
-		fpu->avx512_timestamp = jiffies;
+
+	/*
+	 * Store a separate state for ZMM_Hi256 and Hi16_ZMM xfeature use.
+	 * If ZMM_Hi256 is used the machine has certainly used a zmm
+	 * register.  Hi16_ZMM, however, has false positives on usage of
+	 * hi16 xmm and ymm registers.
+	 */
+	if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_ZMM_Hi256)
+		fpu->avx512_ZMM_Hi256_timestamp = jiffies;
+	if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_Hi16_ZMM)
+		fpu->avx512_Hi16_ZMM_timestamp = jiffies;
 }
 
+
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
+
 /*
- * Report the amount of time elapsed in millisecond since last AVX512
- * use in the task.
+ * Helper function for computing proper output for avx512_status
+ * timestamp.
  */
-static void avx512_status(struct seq_file *m, struct task_struct *task)
+static long avx_status_compute_delta(unsigned long timestamp)
 {
-	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
 	long delta;
 
 	if (!timestamp) {
@@ -1282,8 +1292,27 @@ static void avx512_status(struct seq_file *m, struct task_struct *task)
 			delta = LONG_MAX;
 		delta = jiffies_to_msecs(delta);
 	}
+	return delta;
+}
 
-	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+	unsigned long timestamp;
+	long delta_ZMM_Hi256, delta_Hi16_ZMM;
+
+	timestamp = READ_ONCE(task->thread.fpu.avx512_ZMM_Hi256_timestamp);
+	delta_ZMM_Hi256 = avx_status_compute_delta(timestamp);
+
+	timestamp = READ_ONCE(task->thread.fpu.avx512_Hi16_ZMM_timestamp);
+	delta_Hi16_ZMM = avx_status_compute_delta(timestamp);
+
+	seq_put_decimal_ll(m, "AVX512_ZMM_Hi256_elapsed_ms:\t", delta_ZMM_Hi256);
+	seq_putc(m, '\n');
+	seq_put_decimal_ll(m, "AVX512_Hi16_ZMM_elapsed_ms:\t", delta_Hi16_ZMM);
 	seq_putc(m, '\n');
 }
 
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ