lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20220415223325.21953-1-vgomonovych@marvell.com>
Date:   Fri, 15 Apr 2022 15:33:21 -0700
From:   Vasyl Gomonovych <vgomonovych@...vell.com>
To:     <lenb@...nel.org>, <james.morse@....com>, <tony.luck@...el.com>,
        <bp@...en8.de>, <mchehab@...nel.org>, <rric@...nel.org>,
        <robert.moore@...el.com>, <rdunlap@...radead.org>,
        <ying.huang@...el.com>, <vgomonovych@...vell.com>
CC:     "Rafael J. Wysocki" <rafael@...nel.org>,
        <linux-kernel@...r.kernel.org>, <linux-acpi@...r.kernel.org>,
        <linux-edac@...r.kernel.org>, <devel@...ica.org>
Subject: [PATCH] EDAC: OcteonTX: Add EDAC driver OcteonTX2/Octeon10

Marvell OcteonTX2/Octeon10 (CN10K) SoCs have support for EDAC
capable on-chip peripherals, namely the DRAM controllers
and internal memory controllers for tad/cache diagnostics.
Platforms support correct one bit errors and detect two bit errors.
Driver get notified via SDEI after firmware first handling from TF-ARM
Return the apei_hest_parse function, which we use for error
source enumeration, and adapt the HEST error status
block address after being modified by firmware to satisfy
DRAM property and layout. Revert commit:
commit 06606646af97 ("ACPI: APEI: mark apei_hest_parse() static")

Signed-off-by: Vasyl Gomonovych <vgomonovych@...vell.com>
---
 drivers/acpi/apei/hest.c     |   5 +-
 drivers/edac/Kconfig         |  11 +
 drivers/edac/Makefile        |   1 +
 drivers/edac/octeontx_edac.c | 598 +++++++++++++++++++++++++++++++++++
 drivers/edac/octeontx_edac.h | 102 ++++++
 include/acpi/apei.h          |   3 +
 6 files changed, 717 insertions(+), 3 deletions(-)
 create mode 100644 drivers/edac/octeontx_edac.c
 create mode 100644 drivers/edac/octeontx_edac.h

diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 6aef1ee5e1bd..317bba602ad5 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -86,9 +86,7 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
 	return len;
 };
 
-typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
-
-static int apei_hest_parse(apei_hest_func_t func, void *data)
+int apei_hest_parse(apei_hest_func_t func, void *data)
 {
 	struct acpi_hest_header *hest_hdr;
 	int i, rc, len;
@@ -123,6 +121,7 @@ static int apei_hest_parse(apei_hest_func_t func, void *data)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(apei_hest_parse);
 
 /*
  * Check if firmware advertises firmware first mode. We need FF bit to be set
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 58ab63642e72..bb42d5227954 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -539,4 +539,15 @@ config EDAC_DMC520
 	  Support for error detection and correction on the
 	  SoCs with ARM DMC-520 DRAM controller.
 
+config EDAC_OCTEONTX
+	bool "Marvell OcteonTX2 Octeon10 SoC EDAC"
+	select ARM_SDE_INTERFACE
+	select ACPI_APEI_GHES
+	help
+	  Support for error detection and correction on the
+	  Marvell OcteonTX2 and Octeon10 (CN10K) SoCs.
+	  This driver will allow SECDED errors detected in
+	  the firmware first model and reported via SDEI callbacks.
+	  EDAC Driver report Single Bit Errors and Double Bit Errors.
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 2d1641a27a28..dab4438a6c1a 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -84,3 +84,4 @@ obj-$(CONFIG_EDAC_QCOM)			+= qcom_edac.o
 obj-$(CONFIG_EDAC_ASPEED)		+= aspeed_edac.o
 obj-$(CONFIG_EDAC_BLUEFIELD)		+= bluefield_edac.o
 obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
+obj-$(CONFIG_EDAC_OCTEONTX)		+= octeontx_edac.o
diff --git a/drivers/edac/octeontx_edac.c b/drivers/edac/octeontx_edac.c
new file mode 100644
index 000000000000..1b4c15931d3b
--- /dev/null
+++ b/drivers/edac/octeontx_edac.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of_address.h>
+#include <linux/arm_sdei.h>
+#include "edac_mc.h"
+#include "edac_device.h"
+#include "octeontx_edac.h"
+
+#define DRIVER_NAME	"octeontx-edac-ghes"
+
+#define otx_printk(level, fmt, arg...) edac_printk(level, DRIVER_NAME, fmt, ##arg)
+#define otx_debug(fmt, ...) pr_devel(DRIVER_NAME ":" fmt, __VA_ARGS__)
+
+static const struct of_device_id octeontx_edac_ghes_of_match[] = {
+	{ .compatible = "marvell,sdei-ghes", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, octeontx_edac_ghes_of_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id octeontx_edac_ghes_acpi_match[] = {
+	{ "MRVLECC1", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, octeontx_edac_ghes_acpi_match);
+#endif
+
+static const struct pci_device_id octeontx_edac_ghes_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_OCTEONTX2_LMC) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_OCTEONTX2_MCC) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_OCTEONTX2_MDC) },
+	{ 0, },
+};
+
+static int octeontx_mc_sdei_callback(u32 event_id, struct pt_regs *regs, void *arg)
+{
+	struct octeontx_edac_ghes *gsrc;
+
+	if (!arg) {
+		otx_printk(KERN_DEBUG, "%s null ghes", __func__);
+		return -EINVAL;
+	}
+
+	gsrc = arg;
+	schedule_work(&gsrc->mc_work);
+
+	return 0;
+}
+
+extern void cper_estatus_print(const char *pfx,
+		const struct acpi_hest_generic_status *estatus);
+
+static void octeontx_edac_mc_sdei_wq(struct work_struct *work)
+{
+	struct octeontx_edac_ghes_ring_record *ring;
+	struct octeontx_edac_mc_record rec;
+	enum hw_event_mc_err_type type;
+	struct mem_ctl_info *mci;
+	struct octeontx_edac_ghes *gsrc =
+			container_of(work, struct octeontx_edac_ghes, mc_work);
+	u32 head = gsrc->ring->head;
+	u32 tail = gsrc->ring->tail;
+
+	otx_printk(KERN_DEBUG, "%p, head=%d (%p), tail=%d (%p), size=%d, sign=%x\n",
+			gsrc->esb_va, head, &gsrc->ring->head, tail, &gsrc->ring->tail,
+			gsrc->ring->size, *(int *)((&gsrc->ring->size) + 1));
+
+	/*Ensure that head updated*/
+	rmb();
+
+	if (head == tail)
+		return;
+
+	ring = &gsrc->ring->records[tail];
+	mci = gsrc->mci;
+
+	gsrc->esb_va->estatus.error_severity = ring->error_severity;
+	gsrc->esb_va->estatus.block_status = 1;
+	gsrc->esb_va->gdata.error_severity = ring->error_severity;
+	memcpy_fromio(&rec, gsrc->esb_va, sizeof(rec) - sizeof(rec.cper_mem));
+	memcpy_fromio(&rec.gdata.fru_text, ring->fru_text, sizeof(rec.gdata.fru_text));
+	memcpy_fromio(&rec.cper_mem, &ring->u, sizeof(rec.cper_mem));
+
+	type = (ring->error_severity == CPER_SEV_CORRECTED) ?
+			HW_EVENT_ERR_CORRECTED : HW_EVENT_ERR_FATAL;
+
+	edac_mc_handle_error(type, mci, 1, PHYS_PFN(rec.cper_mem.physical_addr),
+			offset_in_page(rec.cper_mem.physical_addr),
+			0, -1, -1, -1, ring->fru_text, mci->ctl_name);
+
+	if (acpi_disabled) {
+		cper_estatus_print(HW_ERR, &rec.estatus);
+	} else {
+		memcpy_toio(gsrc->esb_va->gdata.fru_text, rec.gdata.fru_text,
+				sizeof(rec.gdata.fru_text));
+		memcpy_toio(&gsrc->esb_va->cper_mem, &rec.cper_mem, sizeof(rec.cper_mem));
+	}
+
+	if (++tail >= gsrc->ring->size)
+		tail = 0;
+	gsrc->ring->tail = tail;
+
+	/*Ensure that head updated*/
+	wmb();
+}
+
+static void octeontx_edac_enable_msix(struct pci_dev *pdev)
+{
+	u16 ctrl;
+
+	if ((pdev->msi_enabled) || (pdev->msix_enabled)) {
+		dev_err(&pdev->dev, "MSI(%d) or MSIX(%d) already enabled\n",
+			pdev->msi_enabled, pdev->msix_enabled);
+		return;
+	}
+
+	pdev->msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+	if (pdev->msix_cap) {
+		pci_read_config_word(pdev, pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+		ctrl |= PCI_MSIX_FLAGS_ENABLE;
+		pci_write_config_word(pdev, pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+
+		otx_printk(KERN_DEBUG, "Set MSI-X Enable for PCI dev %04d:%02d.%d\n",
+			pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+	} else {
+		dev_err(&pdev->dev, "PCI dev %04d:%02d.%d missing MSIX capabilities\n",
+			pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+	}
+}
+
+static void octeontx_edac_msix_init(void)
+{
+	const struct pci_device_id *pdevid;
+	struct pci_dev *pdev;
+	size_t i;
+
+	if (MIDR_PARTNUM(read_cpuid_id()) == OCTEON10_CPU_MODEL)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(octeontx_edac_ghes_pci_tbl); i++) {
+		pdevid = &octeontx_edac_ghes_pci_tbl[i];
+		pdev = NULL;
+		while ((pdev = pci_get_device(pdevid->vendor, pdevid->device, pdev)))
+			octeontx_edac_enable_msix(pdev);
+	}
+}
+
+static void octeontx_edac_ghes_driver_init(struct platform_device *pdev)
+{
+	struct octeontx_edac_driver *ghes_drv;
+	struct octeontx_edac_ghes *gsrc;
+	struct device *dev = &pdev->dev;
+	size_t i = 0;
+	int ret = 0;
+
+	ghes_drv = platform_get_drvdata(pdev);
+
+	for (i = 0; i < ghes_drv->source_count; i++) {
+		gsrc = &ghes_drv->source_list[i];
+
+		ret = sdei_event_register(gsrc->id, octeontx_mc_sdei_callback, gsrc);
+		if (ret < 0) {
+			dev_err(dev, "Error %d registering ghes 0x%x (%s)\n",
+				ret, gsrc->id, gsrc->name);
+			continue;
+		}
+
+		ret = sdei_event_enable(gsrc->id);
+		if (ret < 0) {
+			dev_err(dev, "Error %d enabling ghes 0x%x (%s)\n",
+				ret, gsrc->id, gsrc->name);
+			continue;
+		}
+		gsrc->ring->reg = OCTEONTX_GHES_ERR_RING_SIG;
+
+		otx_debug("Register 0x%x (%s) %s [%p, %p, %p]\n", gsrc->id,
+			gsrc->name, "reg", gsrc->esa_va, gsrc->esb_va, gsrc->ring);
+	}
+}
+
+static int octentx_edac_ghes_driver_deinit(struct platform_device *pdev)
+{
+	struct octeontx_edac_driver *ghes_drv;
+	struct device *dev = &pdev->dev;
+	struct octeontx_edac_ghes *gsrc;
+	int ret, i;
+
+	ghes_drv = platform_get_drvdata(pdev);
+
+	for (i = 0; i < ghes_drv->source_count; i++) {
+		gsrc = &ghes_drv->source_list[i];
+
+		gsrc->ring->reg = 0;
+
+		ret = sdei_event_disable(gsrc->id);
+		if (ret < 0)
+			dev_err(dev, "Error %d disabling SDEI gsrc 0x%x (%s)\n",
+				ret, gsrc->id, gsrc->name);
+
+		ret = sdei_event_unregister(gsrc->id);
+		if (ret < 0)
+			dev_err(dev, "Error %d unregistering SDEI gsrc 0x%x (%s)\n",
+				ret, gsrc->id, gsrc->name);
+
+		if (gsrc->mci) {
+			edac_mc_del_mc(&gsrc->dev);
+			edac_mc_free(gsrc->mci);
+			put_device(&gsrc->dev);
+		}
+	}
+
+	return 0;
+}
+
+static int __init octeontx_edac_ghes_of_match_resource(struct platform_device *pdev)
+{
+	struct device_node *of_node;
+	struct device_node *child_node;
+	struct octeontx_edac_driver *ghes_drv;
+	struct octeontx_edac_ghes *gsrc;
+	struct device *dev;
+	const __be32 *res;
+	u64 size;
+	u64 base;
+	const u32 *id;
+	int i = 0;
+
+	dev = &pdev->dev;
+	ghes_drv = platform_get_drvdata(pdev);
+	of_node = of_find_matching_node(NULL, octeontx_edac_ghes_of_match);
+	if (!of_node)
+		return -ENODEV;
+
+	for_each_available_child_of_node(of_node, child_node) {
+		gsrc = &ghes_drv->source_list[i++];
+
+		strncpy(gsrc->name, child_node->name, sizeof(gsrc->name) - 1);
+
+		res = of_get_address(child_node, 0, NULL, NULL);
+		base = of_translate_address(child_node, res);
+		if (base == OF_BAD_ADDR) {
+			dev_err(dev, "ghes cannot map esa addr\n");
+			return -EINVAL;
+		}
+		gsrc->esa_pa = (phys_addr_t)base;
+
+		res = of_get_address(child_node, 1, &size, NULL);
+		base = of_translate_address(child_node, res);
+		if (base == OF_BAD_ADDR) {
+			dev_err(dev, "ghes cannot map esb addr\n");
+			return -EINVAL;
+		}
+		gsrc->esb_pa = (phys_addr_t)base;
+		gsrc->esb_sz = (size_t)size;
+
+		res = of_get_address(child_node, 2, &size, NULL);
+		base = of_translate_address(child_node, res);
+		if (base == OF_BAD_ADDR) {
+			dev_err(dev, "ghes cannot map ring addr\n");
+			return -EINVAL;
+		}
+		gsrc->ring_pa = (phys_addr_t)base;
+		gsrc->ring_sz = (size_t)size;
+
+		id = of_get_property(child_node, "event-id", NULL);
+		if (!id) {
+			dev_err(dev, "ghes cannot get sdei event\n");
+			return -EINVAL;
+		}
+		gsrc->id = be32_to_cpu(*id);
+
+		otx_debug("%s 0x%llx/0x%llx/0x%llx, ID:0x%x)\n", gsrc->name,
+				gsrc->esa_pa, gsrc->esb_pa, gsrc->ring_pa, gsrc->id);
+	}
+
+	return 0;
+}
+
+static int __init hest_estatus_address(struct acpi_hest_header *hest_hdr, void *data)
+{
+	struct acpi_hest_generic *generic = (struct acpi_hest_generic *)hest_hdr;
+	u64 *esrc = data;
+	static int i;
+
+	esrc[i] = generic->error_status_address.address;
+	i++;
+
+	return 0;
+}
+
+static phys_addr_t __init octeontx_edac_ghes_get_address(struct octeontx_edac_driver *ghes_drv)
+{
+	int i = 0;
+	u64 *esrc = NULL;
+	phys_addr_t ret = ~0ULL;
+
+	esrc = kcalloc(ghes_drv->source_count, sizeof(u64 *), GFP_KERNEL);
+	if (!esrc)
+		return 0;
+
+	apei_hest_parse(hest_estatus_address, esrc);
+
+	for (i = 0; i < ghes_drv->source_count; i++)
+		ret = ret > esrc[i] ? esrc[i] : ret;
+
+	kfree(esrc);
+
+	return ret;
+}
+
+static int __init octeontx_edac_ghes_acpi_match_resource(struct platform_device *pdev)
+{
+	struct octeontx_edac_driver *ghes_drv;
+	struct octeontx_edac_ghes *gsrc;
+	struct resource *res;
+	struct device *dev;
+	size_t i = 0;
+	size_t idx = 0;
+	phys_addr_t base = 0;
+	bool octeon10 = (MIDR_PARTNUM(read_cpuid_id()) == OCTEON10_CPU_MODEL);
+
+	dev = &pdev->dev;
+	ghes_drv = platform_get_drvdata(pdev);
+
+	base = octeontx_edac_ghes_get_address(ghes_drv);
+	if (!base)
+		return -EINVAL;
+
+	for (i = 0; i < ghes_drv->source_count; i++) {
+		gsrc = &ghes_drv->source_list[i];
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, idx++);
+		if (!res) {
+			dev_err(dev, "ghes cannot map esa addr\n");
+			return -ENODEV;
+		}
+		gsrc->esa_pa = res->start + base;
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, idx++);
+		if (!res) {
+			dev_err(dev, "ghes cannot map esb addr\n");
+			return -ENODEV;
+		}
+		gsrc->esb_pa = res->start + base;
+		gsrc->esb_sz = resource_size(res);
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, idx++);
+		if (!res) {
+			dev_err(dev, "ghes cannot map ring addr\n");
+			return -ENODEV;
+		}
+		gsrc->ring_pa = res->start + base;
+		gsrc->ring_sz = resource_size(res);
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, idx++);
+		if (!res) {
+			dev_err(dev, "ghes cannot get event\n");
+			return -ENODEV;
+		}
+		gsrc->id = res->start;
+
+		if (gsrc->id == OCTEONTX_RAS_MDC_SDEI_EVENT)
+			sprintf(gsrc->name, "mdc");
+		else if (gsrc->id == OCTEONTX_RAS_MCC_SDEI_EVENT)
+			sprintf(gsrc->name, octeon10 ? "dss" : "mcc");
+		else if (gsrc->id == OCTEONTX_RAS_LMC_SDEI_EVENT)
+			sprintf(gsrc->name, octeon10 ? "tad" : "lmc");
+
+		otx_debug("ghes 0x%llx / 0x%llx / 0x%llx, sdei:0x%x)\n",
+				gsrc->esa_pa, gsrc->esb_pa, gsrc->ring_pa, gsrc->id);
+	}
+
+	return 0;
+}
+
+static int octeontx_edac_ghes_setup_resource(struct octeontx_edac_driver *ghes_drv)
+{
+	struct octeontx_edac_ghes *gsrc;
+	struct device *dev = ghes_drv->dev;
+	size_t i = 0;
+
+	for (i = 0; i < ghes_drv->source_count; i++) {
+		gsrc = &ghes_drv->source_list[i];
+
+		if (!devm_request_mem_region(dev, gsrc->esa_pa, sizeof(gsrc->esa_va), gsrc->name))
+			return -EBUSY;
+		gsrc->esa_va = devm_ioremap(dev, gsrc->esa_pa, sizeof(gsrc->esa_va));
+		if (!gsrc->esa_va)
+			return -ENOMEM;
+
+		if (has_acpi_companion(dev)) {
+			*gsrc->esa_va = gsrc->esb_pa;
+			devm_iounmap(ghes_drv->dev, gsrc->esa_va);
+			devm_release_mem_region(ghes_drv->dev, gsrc->esa_pa, sizeof(gsrc->esa_va));
+			acpi_os_map_iomem(gsrc->esa_pa, sizeof(gsrc->esa_pa));
+		}
+
+		if (!devm_request_mem_region(dev, gsrc->esb_pa, gsrc->esb_sz, gsrc->name))
+			return -EBUSY;
+		gsrc->esb_va = devm_ioremap(dev, gsrc->esb_pa, gsrc->esb_sz);
+		if (!gsrc->esb_va)
+			return -ENOMEM;
+
+		if (!devm_request_mem_region(dev, gsrc->ring_pa, gsrc->ring_sz, gsrc->name))
+			return -EBUSY;
+		gsrc->ring = devm_ioremap(dev, gsrc->ring_pa, gsrc->ring_sz);
+		if (!gsrc->ring)
+			return -ENOMEM;
+
+		otx_debug("%s %s-%x %p/%p/%p\n", __func__, gsrc->name, gsrc->id,
+			gsrc->esa_va, gsrc->esb_va, gsrc->ring);
+	}
+
+	return 0;
+}
+
+static int hest_count_ghes(struct acpi_hest_header *hest_hdr, void *data)
+{
+	int *count = data;
+
+	if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
+		hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+		(*count)++;
+
+	return 0;
+}
+
+static size_t octeontx_edac_ghes_count(struct octeontx_edac_driver *ghes_drv)
+{
+	size_t count;
+	struct device_node *of_node;
+	struct device_node *child_node;
+	struct device *dev = ghes_drv->dev;
+
+	count = 0;
+	of_node = of_find_matching_node(NULL, octeontx_edac_ghes_of_match);
+
+	if (of_node) {
+		for_each_available_child_of_node(of_node, child_node)
+			count++;
+	} else if (has_acpi_companion(dev)) {
+		count = 0;
+		apei_hest_parse(hest_count_ghes, &count);
+	}
+
+	return count;
+}
+
+static int octeontx_edac_ghes_register_mc(struct octeontx_edac_driver *ghes_drv)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[1];
+	struct octeontx_edac_ghes *gsrc;
+	int idx = 0;
+	int i = 0;
+	int ret = 0;
+
+	for (i = 0; i < ghes_drv->source_count; i++) {
+		gsrc = &ghes_drv->source_list[i];
+
+		if (IS_NOT_MC_SDEI_EVENT(gsrc->id))
+			continue;
+
+		idx = edac_device_alloc_index();
+
+		device_initialize(&gsrc->dev);
+		dev_set_name(&gsrc->dev, gsrc->name);
+		ret = device_add(&gsrc->dev);
+		if (ret < 0) {
+			dev_err(&gsrc->dev, "add device %s\n", dev_name(&gsrc->dev));
+			put_device(&gsrc->dev);
+			return ret;
+		}
+
+		layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+		layers[0].size = 1;
+		layers[0].is_virt_csrow = false;
+
+		mci = edac_mc_alloc(idx, ARRAY_SIZE(layers), layers, 0);
+		if (!mci)
+			return -ENOMEM;
+
+		mci->pdev = &gsrc->dev;
+		mci->dev_name = dev_name(&gsrc->dev);
+		mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+		mci->mod_name = DRIVER_NAME;
+		mci->ctl_name = gsrc->name;
+		mci->edac_check = NULL;
+		mci->pvt_info = gsrc;
+		INIT_WORK(&gsrc->mc_work, octeontx_edac_mc_sdei_wq);
+
+		ret = edac_mc_add_mc_with_groups(mci, NULL);
+		if (ret) {
+			edac_mc_del_mc(&gsrc->dev);
+			put_device(&gsrc->dev);
+			return ret;
+		}
+
+		gsrc->mci = mci;
+	}
+
+	return 0;
+}
+
+static int __init octeontx_edac_ghes_probe(struct platform_device *pdev)
+{
+	struct octeontx_edac_driver *ghes_drv = NULL;
+	struct device *dev = &pdev->dev;
+	int ret = -ENODEV;
+
+	ghes_drv = devm_kzalloc(dev, sizeof(struct octeontx_edac_driver), GFP_KERNEL);
+	if (!ghes_drv)
+		return -ENOMEM;
+
+	ghes_drv->dev = dev;
+
+	ghes_drv->source_count = octeontx_edac_ghes_count(ghes_drv);
+	if (!ghes_drv->source_count) {
+		dev_err(dev, "Not available ghes.\n");
+		return -EINVAL;
+	}
+	otx_printk(KERN_DEBUG, "%s source count %ld\n", __func__, ghes_drv->source_count);
+
+	ghes_drv->source_list = devm_kcalloc(dev, ghes_drv->source_count,
+			sizeof(struct octeontx_edac_ghes), GFP_KERNEL);
+	if (!ghes_drv->source_list)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ghes_drv);
+
+	if (has_acpi_companion(dev)) {
+		acpi_match_device(dev->driver->acpi_match_table, dev);
+		ret = octeontx_edac_ghes_acpi_match_resource(pdev);
+	} else {
+		acpi_permanent_mmap = true;
+		set_bit(EFI_MEMMAP, &efi.flags);
+		ret = octeontx_edac_ghes_of_match_resource(pdev);
+	}
+	if (ret < 0) {
+		dev_err(dev, "Failed match resources\n");
+		goto exit0;
+	}
+
+	ret = octeontx_edac_ghes_setup_resource(ghes_drv);
+	if (ret)
+		goto exit0;
+
+	octeontx_edac_msix_init();
+
+	octeontx_edac_ghes_driver_init(pdev);
+
+	ret = octeontx_edac_ghes_register_mc(ghes_drv);
+	if (ret)
+		goto exit0;
+
+	return 0;
+
+exit0:
+	dev_err(dev, "Error edac probe\n");
+
+	return ret;
+}
+
+static int octeontx_edac_ghes_remove(struct platform_device *pdev)
+{
+	octentx_edac_ghes_driver_deinit(pdev);
+
+	return 0;
+}
+
+static const struct platform_device_id octeontx_edac_ghes_pdev_match[] = {
+	{ .name = DRIVER_NAME, },
+	{},
+};
+MODULE_DEVICE_TABLE(platform, octeontx_edac_ghes_pdev_match);
+
+static struct platform_driver octeontx_edac_ghes_drv_probe = {
+	.driver = {
+		.name             = DRIVER_NAME,
+		.of_match_table   = of_match_ptr(octeontx_edac_ghes_of_match),
+		.acpi_match_table = ACPI_PTR(octeontx_edac_ghes_acpi_match),
+	},
+	.probe    = octeontx_edac_ghes_probe,
+	.remove   = octeontx_edac_ghes_remove,
+	.id_table = octeontx_edac_ghes_pdev_match,
+};
+module_platform_driver(octeontx_edac_ghes_drv_probe);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("OcteonTX2 / CN10K EDAC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/edac/octeontx_edac.h b/drivers/edac/octeontx_edac.h
new file mode 100644
index 000000000000..f79c182ff0f8
--- /dev/null
+++ b/drivers/edac/octeontx_edac.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2022 Marvell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OCTEONTX_EDAC_H__
+#define __OCTEONTX_EDAC_H__
+
+#define OCTEON10_CPU_MODEL		(0xd49)
+#define PCI_DEVICE_ID_OCTEONTX2_LMC	(0xa022)
+#define PCI_DEVICE_ID_OCTEONTX2_MCC	(0xa070)
+#define PCI_DEVICE_ID_OCTEONTX2_MDC	(0xa073)
+
+#define OCTEONTX_RAS_MDC_SDEI_EVENT	(0x40000000)
+#define OCTEONTX_RAS_MCC_SDEI_EVENT	(0x40000001)
+#define OCTEONTX_RAS_LMC_SDEI_EVENT	(0x40000002)
+#define OCTEONTX_RAS_DSS_SDEI_EVENT	(0x40000001)
+#define OCTEONTX_RAS_TAD_SDEI_EVENT	(0x40000002)
+
+#define OCTEONTX_GHES_NAME_MAX_LEN 16
+#define OCTEONTX_GHES_FRU_TEXT_LEN 32
+
+#define OCTEONTX_GHES_ERR_RING_SIG ((int)'M' << 24 | 'R' << 16 | 'V' << 8 | 'L')
+
+#define IS_NOT_MC_SDEI_EVENT(id) ((id != OCTEONTX_RAS_MDC_SDEI_EVENT) && \
+	(id != OCTEONTX_RAS_MCC_SDEI_EVENT) && \
+	(id != OCTEONTX_RAS_LMC_SDEI_EVENT))
+
+struct octeontx_edac_mc_record {
+	struct acpi_hest_generic_status estatus;
+	struct acpi_hest_generic_data   gdata;
+	struct cper_sec_mem_err         cper_mem;
+};
+
+/*
+ * Describes an error source per ACPI (Generic Hardware Error Source).
+ * This produces GHES-compliant error records from data forwarded by the [ATF]
+ * firmware.
+ * There exists one of these for each error source.
+ *
+ * @name:               ghes source name
+ * @id:                 sdei event id
+ * @esa_pa              physical address of Error Status Address register/iomem
+ * @esb_pa:             phys address of Error Status Block follow Error Status Data
+ * @ring_pa:            physical address of Ring of Error Status Blocks
+ * @esa_va:             mapped to Error Status Address point on Error Status Block
+ * @esb_va:             mapped to Error Status Block
+ * @ring:               mapped to Ring of Error Status Blocks
+ * @ring_sz:            ring size
+ * @esb_sz:             esb size
+ * @dev:                memory controller owner device
+ * @mci                 corresponding memory controller
+ * @mc_work             worker for sdei callback
+ */
+struct octeontx_edac_ghes {
+	char                            name[OCTEONTX_GHES_NAME_MAX_LEN];
+	u32                             id;
+	phys_addr_t                     esa_pa;
+	phys_addr_t                     esb_pa;
+	phys_addr_t                     ring_pa;
+	phys_addr_t                     *esa_va;
+	struct octeontx_edac_mc_record     *esb_va;
+	struct octeontx_edac_ghes_ring  *ring;
+	size_t                          ring_sz;
+	size_t                          esb_sz;
+	struct device                   dev;
+	struct mem_ctl_info             *mci;
+	struct work_struct              mc_work;
+};
+
+struct octeontx_edac_driver {
+	struct device             *dev;
+	struct octeontx_edac_ghes *source_list;
+	size_t                    source_count;
+};
+
+struct octeontx_edac_ghes_ring_record {
+	union {
+		struct cper_sec_mem_err mcc;
+		struct cper_sec_mem_err mdc;
+		struct cper_sec_mem_err lmc;
+		struct cper_sec_mem_err tad;
+		struct cper_sec_mem_err dss;
+	} u;
+	uint32_t error_severity;
+	char fru_text[OCTEONTX_GHES_FRU_TEXT_LEN];
+};
+
+struct octeontx_edac_ghes_ring {
+	uint32_t head;
+	uint32_t tail;
+	uint32_t size;
+	uint32_t sig;
+	uint32_t reg;
+	struct octeontx_edac_ghes_ring_record records[0] __aligned(8);
+};
+
+#endif // __OCTEONTX_EDAC_H__
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index afaca3a075e8..4be95ef425c0 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -39,6 +39,9 @@ void __init acpi_hest_init(void);
 static inline void acpi_hest_init(void) { }
 #endif
 
+typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
+int apei_hest_parse(apei_hest_func_t func, void *data);
+
 int erst_write(const struct cper_record_header *record);
 ssize_t erst_get_record_count(void);
 int erst_get_record_id_begin(int *pos);
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ