[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Fri, 5 May 2017 19:53:33 +0200
From: Borislav Petkov <bp@...en8.de>
To: Tyler Baicar <tbaicar@...eaurora.org>
Cc: christoffer.dall@...aro.org, marc.zyngier@....com,
pbonzini@...hat.com, rkrcmar@...hat.com, linux@...linux.org.uk,
catalin.marinas@....com, will.deacon@....com, rjw@...ysocki.net,
lenb@...nel.org, matt@...eblueprint.co.uk, robert.moore@...el.com,
lv.zheng@...el.com, nkaje@...eaurora.org, zjzhang@...eaurora.org,
mark.rutland@....com, james.morse@....com,
akpm@...ux-foundation.org, eun.taik.lee@...sung.com,
sandeepa.s.prabhu@...il.com, labbott@...hat.com,
shijie.huang@....com, rruigrok@...eaurora.org,
paul.gortmaker@...driver.com, tn@...ihalf.com, fu.wei@...aro.org,
rostedt@...dmis.org, bristot@...hat.com,
linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.cs.columbia.edu,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-acpi@...r.kernel.org, linux-efi@...r.kernel.org,
devel@...ica.org, Suzuki.Poulose@....com, punit.agrawal@....com,
astone@...hat.com, harba@...eaurora.org, hanjun.guo@...aro.org,
john.garry@...wei.com, shiju.jose@...wei.com, joe@...ches.com,
rafael@...nel.org, tony.luck@...el.com, gengdongjiu@...wei.com,
xiexiuqi@...wei.com
Subject: Re: [PATCH V15 09/11] ras: acpi / apei: generate trace event for
unrecognized CPER section
On Tue, Apr 18, 2017 at 05:05:21PM -0600, Tyler Baicar wrote:
> UEFI spec allows for non-standard section in Common Platform Error
> Record. This is defined in section N.2.3 of UEFI version 2.5.
If the spec calls it non-standard why are we calling it "unknown
section"?
> Currently if the CPER section's type (UUID) does not match with
> any section type that the kernel knows how to parse, trace event
> is not generated for such section. And thus user is not able to know
> happening of such hardware error, including error record of
> non-standard section.
That's sentence sounds funny.
> This commit generates a trace event which contains raw error data
> for unrecognized CPER section.
Never write "This commit" or "This patch" in your commit message -
that's a given.
>
> Signed-off-by: Tyler Baicar <tbaicar@...eaurora.org>
> CC: Jonathan (Zhixiong) Zhang <zjzhang@...eaurora.org>
> Tested-by: Shiju Jose <shiju.jose@...wei.com>
> ---
> drivers/acpi/apei/ghes.c | 27 +++++++++++++++++++++++----
> drivers/ras/ras.c | 1 +
> include/ras/ras_event.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 69 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index b91123f..3d9f63b 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -45,11 +45,13 @@
> #include <linux/aer.h>
> #include <linux/nmi.h>
> #include <linux/sched/clock.h>
> +#include <linux/uuid.h>
>
> #include <acpi/actbl1.h>
> #include <acpi/ghes.h>
> #include <acpi/apei.h>
> #include <asm/tlbflush.h>
> +#include <ras/ras_event.h>
>
> #include "apei-internal.h"
>
> @@ -461,12 +463,21 @@ static void ghes_do_proc(struct ghes *ghes,
> {
> int sev, sec_sev;
> struct acpi_hest_generic_data *gdata;
> + uuid_le sec_type;
> + uuid_le *fru_id = &NULL_UUID_LE;
> + char *fru_text = "";
>
> sev = ghes_severity(estatus->error_severity);
> apei_estatus_for_each_section(estatus, gdata) {
> sec_sev = ghes_severity(gdata->error_severity);
> - if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
> - CPER_SEC_PLATFORM_MEM)) {
> + sec_type = *(uuid_le *)gdata->section_type;
> +
> + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> + fru_id = (uuid_le *)gdata->fru_id;
> + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> + fru_text = gdata->fru_text;
> +
> + if (!uuid_le_cmp(sec_type, CPER_SEC_PLATFORM_MEM)) {
> struct cper_sec_mem_err *mem_err;
> mem_err = acpi_hest_get_payload(gdata);
> ghes_edac_report_mem_error(ghes, sev, mem_err);
> @@ -475,8 +486,7 @@ static void ghes_do_proc(struct ghes *ghes,
> ghes_handle_memory_failure(gdata, sev);
> }
> #ifdef CONFIG_ACPI_APEI_PCIEAER
> - else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
> - CPER_SEC_PCIE)) {
> + else if (!uuid_le_cmp(sec_type, CPER_SEC_PCIE)) {
> struct cper_sec_pcie *pcie_err;
> pcie_err = acpi_hest_get_payload(gdata);
> if (sev == GHES_SEV_RECOVERABLE &&
> @@ -507,6 +517,15 @@ static void ghes_do_proc(struct ghes *ghes,
>
> }
> #endif
> +#ifdef CONFIG_RAS
> + else if (trace_unknown_sec_event_enabled()) {
> + void *unknown_err = acpi_hest_get_payload(gdata);
> +
> + trace_unknown_sec_event(&sec_type,
> + fru_id, fru_text, sec_sev,
> + unknown_err, gdata->error_data_length);
> + }
> +#endif
Put that in a function in ras.c along with a prototype for
include/linux/ras.h for the !CONFIG_RAS case so that you can save
yourself the ifdeffery in an already not really easy to read function.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
Powered by blists - more mailing lists