lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <55d0c9167fc30dcb6458467d01f657d74499b00c.1351705248.git.mchehab@redhat.com>
Date:	Wed, 31 Oct 2012 15:44:54 -0200
From:	Mauro Carvalho Chehab <mchehab@...hat.com>
To:	unlisted-recipients:; (no To-header on input)
Cc:	Mauro Carvalho Chehab <mchehab@...hat.com>,
	Linux Edac Mailing List <linux-edac@...r.kernel.org>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: [RFC EDAC/GHES 2/3] edac: add support for raw error reports

That allows APEI GHES driver to report errors directly, using
the EDAC error report API.

Signed-off-by: Mauro Carvalho Chehab <mchehab@...hat.com>
---
 drivers/edac/edac_core.h |  17 ++++++++
 drivers/edac/edac_mc.c   | 109 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index db30694..d936958 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -453,6 +453,23 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
 extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
 extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
 				      unsigned long page);
+
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+			  struct mem_ctl_info *mci,
+			  long grain,
+			  const u16 error_count,
+			  const int top_layer,
+			  const int mid_layer,
+			  const int low_layer,
+			  const unsigned long page_frame_number,
+			  const unsigned long offset_in_page,
+			  const unsigned long syndrome,
+			  const char *msg,
+			  const char *location,
+			  const char *label,
+			  const char *other_detail,
+			  const bool enable_per_layer_report);
+
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  struct mem_ctl_info *mci,
 			  const u16 error_count,
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index d4d891a..d3e9341 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1079,6 +1079,81 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 #define OTHER_LABEL " or "
 
 /**
+ * edac_raw_mc_handle_error - reports a memory event to userspace without doing
+ *			      anything to discover the error location
+ *
+ * @type:		severity of the error (CE/UE/Fatal)
+ * @mci:		a struct mem_ctl_info pointer
+ * @grain:		error granularity
+ * @error_count:	Number of errors of the same type
+ * @top_layer:		Memory layer[0] position
+ * @mid_layer:		Memory layer[1] position
+ * @low_layer:		Memory layer[2] position
+ * @page_frame_number:	mem page where the error occurred
+ * @offset_in_page:	offset of the error inside the page
+ * @syndrome:		ECC syndrome
+ * @msg:		Message meaningful to the end users that
+ *			explains the event\
+ * @location:		location of the error, like "csrow:0 channel:1"
+ * @label:		DIMM labels for the affected memory(ies)
+ * @other_detail:	Technical details about the event that
+ *			may help hardware manufacturers and
+ *			EDAC developers to analyse the event
+ * @enable_per_layer_report: should it increment per-layer error counts?
+ *
+ * This raw function is used internally by edac_mc_handle_error(). It should
+ * only be called directly when the hardware error come directly from BIOS,
+ * like in the case of APEI GHES driver.
+ */
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+			  struct mem_ctl_info *mci,
+			  long grain,
+			  const u16 error_count,
+			  const int top_layer,
+			  const int mid_layer,
+			  const int low_layer,
+			  const unsigned long page_frame_number,
+			  const unsigned long offset_in_page,
+			  const unsigned long syndrome,
+			  const char *msg,
+			  const char *location,
+			  const char *label,
+			  const char *other_detail,
+			  const bool enable_per_layer_report)
+{
+	char detail[80];
+	u8 grain_bits;
+	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
+
+	/* Report the error via the trace interface */
+
+	grain_bits = fls_long(grain) + 1;
+	trace_mc_event(type, msg, label, error_count,
+		       mci->mc_idx, top_layer, mid_layer, low_layer,
+		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
+		       grain_bits, syndrome, other_detail);
+
+	/* Memory type dependent details about the error */
+	if (type == HW_EVENT_ERR_CORRECTED) {
+		snprintf(detail, sizeof(detail),
+			 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
+			 page_frame_number, offset_in_page,
+			 grain, syndrome);
+		edac_ce_error(mci, error_count, pos, msg, location, label,
+			      detail, other_detail, enable_per_layer_report,
+			      page_frame_number, offset_in_page, grain);
+	} else {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%ld",
+			page_frame_number, offset_in_page, grain);
+
+		edac_ue_error(mci, error_count, pos, msg, location, label,
+			      detail, other_detail, enable_per_layer_report);
+	}
+}
+EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
+
+/**
  * edac_mc_handle_error - reports a memory event to userspace
  *
  * @type:		severity of the error (CE/UE/Fatal)
@@ -1109,7 +1184,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const char *other_detail)
 {
 	/* FIXME: too much for stack: move it to some pre-alocated area */
-	char detail[80], location[80];
+	char location[80];
 	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
 	char *p;
 	int row = -1, chan = -1;
@@ -1117,7 +1192,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	int i;
 	long grain;
 	bool enable_per_layer_report = false;
-	u8 grain_bits;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
 
@@ -1242,30 +1316,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	if (p > location)
 		*(p - 1) = '\0';
 
-	/* Report the error via the trace interface */
-
-	grain_bits = fls_long(grain) + 1;
-	trace_mc_event(type, msg, label, error_count,
-		       mci->mc_idx, top_layer, mid_layer, low_layer,
-		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
-		       grain_bits, syndrome, other_detail);
-
-	/* Memory type dependent details about the error */
-	if (type == HW_EVENT_ERR_CORRECTED) {
-		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-			page_frame_number, offset_in_page,
-			grain, syndrome);
-		edac_ce_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report,
-			      page_frame_number, offset_in_page, grain);
-	} else {
-		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%ld",
-			page_frame_number, offset_in_page, grain);
-
-		edac_ue_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report);
-	}
+	edac_raw_mc_handle_error(type, mci, grain, error_count,
+				 top_layer, mid_layer, low_layer,
+				 page_frame_number, offset_in_page,
+				 syndrome,
+				 msg, location, label, other_detail,
+				 enable_per_layer_report);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ