lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 19 Aug 2015 00:44:16 +0800
From:	fu.wei@...aro.org
To:	linaro-acpi@...ts.linaro.org, linux-kernel@...r.kernel.org,
	zjzhang@...eaurora.org, tony.luck@...el.com, gong.chen@...el.com,
	ying.huang@...el.com, tomasz.nowicki@...aro.org
Cc:	tekkamanninja@...il.com, graeme.gregory@...aro.org,
	al.stone@...aro.org, hanjun.guo@...aro.org, jcm@...hat.com,
	mark.rutland@....com, catalin.marinas@....com, will.deacon@....com,
	rjw@...ysocki.net, bp@...en8.de, matt.fleming@...el.com,
	"Chen, Gong" <gong.chen@...ux.intel.com>,
	Fu Wei <fu.wei@...aro.org>
Subject: [PATCH v2 1/2] acpi, apei: add Boot Error Record Table (BERT) support

From: Huang Ying <ying.huang@...el.com>

Under normal circumstances, when a hardware error occurs, kernel will
be notified via NMI, MCE or some other method, then kernel will
process the error condition, report it, and recover it if possible.
But sometime, the situation is so bad, so that firmware may choose to
reset directly without notifying Linux kernel.

Linux kernel can use the Boot Error Record Table (BERT) to get the
un-notified hardware errors that occurred in a previous boot. In this
patch, the error information is reported via printk.

For more information about BERT, please refer to ACPI Specification
version 6.0, section 18.3.1:
  http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf

[Tony: Applied some cleanups suggested by Fu Wei]
[Fu Wei: delete EXPORT_SYMBOL_GPL(bert_disable), and do some cleanups]

Signed-off-by: Huang Ying <ying.huang@...el.com>
Signed-off-by: Tomasz Nowicki <tomasz.nowicki@...aro.org>
Signed-off-by: Chen, Gong <gong.chen@...ux.intel.com>
Tested-by: Jonathan (Zhixiong) Zhang <zjzhang@...eaurora.org>
Tested-by: Fu Wei <fu.wei@...aro.org>
Signed-off-by: Tony Luck <tony.luck@...el.com>
Signed-off-by: Fu Wei <fu.wei@...aro.org>
---
 Documentation/kernel-parameters.txt |   3 +
 drivers/acpi/apei/Makefile          |   2 +-
 drivers/acpi/apei/bert.c            | 162 ++++++++++++++++++++++++++++++++++++
 include/acpi/apei.h                 |   1 +
 4 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d6f045..7c6402c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -554,6 +554,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	bootmem_debug	[KNL] Enable bootmem allocator debug messages.
 
+	bert_disable	[ACPI]
+			Disable Boot Error Record Table (BERT) support.
+
 	bttv.card=	[HW,V4L] bttv (bt848 + bt878 based grabber cards)
 	bttv.radio=	Most important insmod options are available as
 			kernel args too.
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index 5d575a9..e50573d 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -3,4 +3,4 @@ obj-$(CONFIG_ACPI_APEI_GHES)	+= ghes.o
 obj-$(CONFIG_ACPI_APEI_EINJ)	+= einj.o
 obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
 
-apei-y := apei-base.o hest.o erst.o
+apei-y := apei-base.o hest.o erst.o bert.o
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
new file mode 100644
index 0000000..1426227
--- /dev/null
+++ b/drivers/acpi/apei/bert.c
@@ -0,0 +1,162 @@
+/*
+ * APEI Boot Error Record Table (BERT) support
+ *
+ * Copyright 2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@...el.com>
+ *
+ * Under normal circumstances, when a hardware error occurs, kernel
+ * will be notified via NMI, MCE or some other method, then kernel
+ * will process the error condition, report it, and recover it if
+ * possible. But sometime, the situation is so bad, so that firmware
+ * may choose to reset directly without notifying Linux kernel.
+ *
+ * Linux kernel can use the Boot Error Record Table (BERT) to get the
+ * un-notified hardware errors that occurred in a previous boot.
+ *
+ * For more information about BERT, please refer to ACPI Specification
+ * version 4.0, section 17.3.1
+ *
+ * This file is licensed under GPLv2.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+
+#include "apei-internal.h"
+
+#define BERT_PFX "BERT: "
+
+static int bert_disable;
+
+static void __init bert_print_all(struct acpi_hest_generic_status *region,
+				  unsigned int region_len)
+{
+	struct acpi_hest_generic_status *estatus = region;
+	int remain = region_len;
+	u32 estatus_len;
+	int first = 1;
+
+	while (remain > sizeof(struct acpi_hest_generic_status)) {
+		/* No more error record */
+		if (!estatus->block_status)
+			break;
+
+		estatus_len = cper_estatus_len(estatus);
+		if (estatus_len < sizeof(struct acpi_hest_generic_status) ||
+		    remain < estatus_len) {
+			pr_err(FW_BUG BERT_PFX
+			       "Invalid error status block with length %u\n",
+			       estatus_len);
+			return;
+		}
+
+		if (cper_estatus_check(estatus)) {
+			pr_err(FW_BUG BERT_PFX "Invalid error status block\n");
+			goto next;
+		}
+
+		if (first) {
+			pr_info(HW_ERR "Error record from previous boot:\n");
+			first = 0;
+		}
+
+		cper_estatus_print(KERN_INFO HW_ERR, estatus);
+next:
+		estatus = (void *)estatus + estatus_len;
+		remain -= estatus_len;
+	}
+}
+
+static int __init setup_bert_disable(char *str)
+{
+	bert_disable = 1;
+
+	return 0;
+}
+__setup("bert_disable", setup_bert_disable);
+
+static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+{
+	if (bert_tab->header.length < sizeof(struct acpi_table_bert))
+		return -EINVAL;
+	if (bert_tab->region_length &&
+	    bert_tab->region_length < sizeof(struct acpi_bert_region))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init bert_init(void)
+{
+	struct acpi_hest_generic_status *bert_region;
+	struct acpi_table_bert *bert_tab;
+	unsigned int region_len;
+	acpi_status status;
+	struct resource *r;
+	int rc = -EINVAL;
+
+	if (acpi_disabled)
+		goto out;
+
+	if (bert_disable) {
+		pr_info(BERT_PFX "Boot Error Record Table (BERT) support is disabled.\n");
+		goto out;
+	}
+
+	status = acpi_get_table(ACPI_SIG_BERT, 0,
+				(struct acpi_table_header **)&bert_tab);
+	if (status == AE_NOT_FOUND) {
+		pr_err(BERT_PFX "Table is not found!\n");
+		goto out;
+	} else if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+
+		pr_err(BERT_PFX "Failed to get table, %s\n", msg);
+		goto out;
+	}
+
+	rc = bert_check_table(bert_tab);
+	if (rc) {
+		pr_err(FW_BUG BERT_PFX "BERT table is invalid\n");
+		goto out;
+	}
+
+	region_len = bert_tab->region_length;
+	if (!region_len) {
+		rc = 0;
+		goto out;
+	}
+
+	r = request_mem_region(bert_tab->address, region_len, "APEI BERT");
+	if (!r) {
+		pr_err(BERT_PFX "Can't request iomem region <%016llx-%016llx>\n",
+		       (unsigned long long)bert_tab->address,
+		       (unsigned long long)bert_tab->address + region_len - 1);
+		rc = -EIO;
+		goto out;
+	}
+
+	bert_region = ioremap_cache(bert_tab->address, region_len);
+	if (!bert_region) {
+		rc = -ENOMEM;
+		goto out_release;
+	}
+
+	bert_print_all(bert_region, region_len);
+
+	iounmap(bert_region);
+
+out_release:
+	release_mem_region(bert_tab->address, region_len);
+out:
+	if (rc)
+		bert_disable = 1;
+
+	return rc;
+}
+
+late_initcall(bert_init);
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 76284bb..284801a 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -23,6 +23,7 @@ extern bool ghes_disable;
 #else
 #define ghes_disable 1
 #endif
+extern int bert_disable;
 
 #ifdef CONFIG_ACPI_APEI
 void __init acpi_hest_init(void);
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ