lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241112073729.66059-1-qiuxu.zhuo@intel.com>
Date: Tue, 12 Nov 2024 15:37:29 +0800
From: Qiuxu Zhuo <qiuxu.zhuo@...el.com>
To: ak@...ux.intel.com,
	tony.luck@...el.com
Cc: linux-kernel@...r.kernel.org,
	qiuxu.zhuo@...el.com
Subject: [PATCH 1/1] mce-inject: Add an MCE injection case with the yellow status set

Intel processors with the capability of 'threshold-based error status'
use tracking hardware to monitor corrected errors of certain hardware
components (e.g., CPU caches). If the tracking hardware overflows, a
'yellow' flag will be set in the MCI_STATUS[54:53] to indicate that
the corrected errors of the associated hardware component exceed the
predefined threshold. If so, then the system may need to be scheduled
for servicing within a few weeks.

Add an MCE injection case with the yellow status set to test whether the
events of threshold-based corrected errors are handled by Linux kernel
or/and the user-space tools like mcelog or rasdaemon.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@...el.com>
---
 mce.h                 | 2 ++
 mce.lex               | 2 ++
 mce.y                 | 8 ++++----
 test/corrected-yellow | 4 ++++
 4 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 test/corrected-yellow

diff --git a/mce.h b/mce.h
index d941dce85326..b23232ec6273 100644
--- a/mce.h
+++ b/mce.h
@@ -15,6 +15,7 @@
 #define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */
 #define MCG_STATUS_LMCES  (1ULL<<3)   /* local machine check exception signaled */
+#define MCG_STATUS_TES_P  (1ULL<<11)  /* Threshold-based error status supported */
 #define MCG_STATUS_SEAM_NR (1ULL<<12) /* SEAM NON-ROOT */
 
 #define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
@@ -26,6 +27,7 @@
 #define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
 #define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
+#define MCI_STATUS_TES_YELLOW (1ULL<<54)  /* Threshold-based errors above threshold */
 
 /* MISC register defines */
 #define MCM_ADDR_SEGOFF  0	/* segment offset */
diff --git a/mce.lex b/mce.lex
index 81e950c16cf6..5c8cd51d7109 100644
--- a/mce.lex
+++ b/mce.lex
@@ -96,6 +96,7 @@ static struct key {
 	KEYVAL(MCIP, MCG_STATUS_MCIP),
 	KEYVAL(LMCES, MCG_STATUS_LMCES),
 	KEYVAL(SEAM_NR, MCG_STATUS_SEAM_NR),
+	KEYVAL(TES_P, MCG_STATUS_TES_P),
 	KEYVAL(VAL, MCI_STATUS_VAL),
 	KEYVAL(OVER, MCI_STATUS_OVER),
 	KEYVAL(UC, MCI_STATUS_UC),
@@ -103,6 +104,7 @@ static struct key {
 	KEYVAL(PCC, MCI_STATUS_PCC),
 	KEYVAL(S, MCI_STATUS_S),
 	KEYVAL(AR, MCI_STATUS_AR),
+	KEYVAL(TES_YELLOW, MCI_STATUS_TES_YELLOW),
 	KEYVAL(UCNA, 0),
 	KEYVAL(SRAO, MCI_STATUS_S),
 	KEYVAL(SRAR, MCI_STATUS_S|MCI_STATUS_AR),
diff --git a/mce.y b/mce.y
index 90149e046589..9a28fa77a41b 100644
--- a/mce.y
+++ b/mce.y
@@ -49,8 +49,8 @@ static void init(void);
 %token SYMBOL
 %token MACHINE CHECK EXCEPTION
 
-%token RIPV EIPV MCIP LMCES SEAM_NR
-%token VAL OVER UC EN PCC S AR UCNA SRAO SRAR
+%token RIPV EIPV MCIP LMCES TES_P SEAM_NR
+%token VAL OVER UC EN PCC S AR TES_YELLOW UCNA SRAO SRAR
 
 %%
 
@@ -103,13 +103,13 @@ mcgstatus_list:  /* empty */ { $$ = 0; }
      | mcgstatus_list mcgstatus { $$ = $1 | $2; } 
      ;
 
-mcgstatus : RIPV | EIPV | MCIP | LMCES | SEAM_NR | NUMBER ;
+mcgstatus : RIPV | EIPV | MCIP | LMCES | TES_P | SEAM_NR | NUMBER ;
 
 status_list: 	 /* empty */ { $$ = 0; } 
      | status_list status { $$ = $1 | $2; } 
 
 status: UC | EN | VAL | OVER | PCC | NUMBER | CORRECTED | UNCORRECTED | 
-     FATAL | S | AR | UCNA | SRAO | SRAR
+     FATAL | S | AR | TES_YELLOW | UCNA | SRAO | SRAR
      ; 
 
 %% 
diff --git a/test/corrected-yellow b/test/corrected-yellow
new file mode 100644
index 000000000000..15c8374cc41b
--- /dev/null
+++ b/test/corrected-yellow
@@ -0,0 +1,4 @@
+# Log corrected errors with yellow status (corrected errors above the predefined threshold).
+CPU 0 BANK 3
+MCGSTATUS TES_P
+STATUS CORRECTED TES_YELLOW
-- 
2.17.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ