[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ba3c08e4-72b5-d05d-e44f-65d11240da28@codeaurora.org>
Date: Mon, 28 Nov 2016 11:55:14 -0700
From: "Baicar, Tyler" <tbaicar@...eaurora.org>
To: James Morse <james.morse@....com>
Cc: marc.zyngier@....com, pbonzini@...hat.com, rkrcmar@...hat.com,
linux@...linux.org.uk, catalin.marinas@....com,
will.deacon@....com, rjw@...ysocki.net, lenb@...nel.org,
matt@...eblueprint.co.uk, robert.moore@...el.com,
lv.zheng@...el.com, nkaje@...eaurora.org, zjzhang@...eaurora.org,
mark.rutland@....com, akpm@...ux-foundation.org,
eun.taik.lee@...sung.com, sandeepa.s.prabhu@...il.com,
shijie.huang@....com, rruigrok@...eaurora.org,
paul.gortmaker@...driver.com, tomasz.nowicki@...aro.org,
fu.wei@...aro.org, rostedt@...dmis.org, bristot@...hat.com,
linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.cs.columbia.edu,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-acpi@...r.kernel.org, linux-efi@...r.kernel.org,
Suzuki.Poulose@....com, punit.agrawal@....com, astone@...hat.com,
harba@...eaurora.org, hanjun.guo@...aro.org
Subject: Re: [PATCH V5 02/10] ras: acpi/apei: cper: generic error data entry
v3 per ACPI 6.1
Hello James,
On 11/25/2016 11:20 AM, James Morse wrote:
> Hi Tyler,
>
> On 21/11/16 22:35, Tyler Baicar wrote:
>> Currently when a RAS error is reported it is not timestamped.
>> The ACPI 6.1 spec adds the timestamp field to the generic error
>> data entry v3 structure. The timestamp of when the firmware
>> generated the error is now being reported.
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index b79abc5..9063d68 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -420,7 +420,8 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
>> int flags = -1;
>> int sec_sev = ghes_severity(gdata->error_severity);
>> struct cper_sec_mem_err *mem_err;
>> - mem_err = (struct cper_sec_mem_err *)(gdata + 1);
>> +
>> + mem_err = acpi_hest_generic_data_payload(gdata);
>>
>> if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
>> return;
>> @@ -450,14 +451,18 @@ static void ghes_do_proc(struct ghes *ghes,
>> {
>> int sev, sec_sev;
>> struct acpi_hest_generic_data *gdata;
>> + uuid_le sec_type;
> ghes.c doesn't include <linux/uuid.h>, but I see it already uses uuid_le_cmp().
> Worth fixing as part of this patch?
I can add it here, but it shouldn't be needed. ghes.c includes
<linux/cper.h> and that header
includes <linux/uuid.h>. Should it be added just to make the dependency
more clear?
>>
>> sev = ghes_severity(estatus->error_severity);
>> apei_estatus_for_each_section(estatus, gdata) {
>> sec_sev = ghes_severity(gdata->error_severity);
>> - if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>> + sec_type = *(uuid_le *)gdata->section_type;
>> +
> You don't use sec_type again here, why change this?
> (should it be in a later patch?)
Ah, yes, this change should be moved to patch 8 in this patchset.
>> + if (!uuid_le_cmp(sec_type,
>> CPER_SEC_PLATFORM_MEM)) {
>> struct cper_sec_mem_err *mem_err;
>> - mem_err = (struct cper_sec_mem_err *)(gdata+1);
>> +
>> + mem_err = acpi_hest_generic_data_payload(gdata);
>> ghes_edac_report_mem_error(ghes, sev, mem_err);
>>
>> arch_apei_report_mem_error(sev, mem_err);
>> @@ -467,7 +472,8 @@ static void ghes_do_proc(struct ghes *ghes,
>> else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>> CPER_SEC_PCIE)) {
>> struct cper_sec_pcie *pcie_err;
>> - pcie_err = (struct cper_sec_pcie *)(gdata+1);
>> +
>> + pcie_err = acpi_hest_generic_data_payload(gdata);
>> if (sev == GHES_SEV_RECOVERABLE &&
>> sec_sev == GHES_SEV_RECOVERABLE &&
>> pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
>> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
>> index d425374..7e2439e 100644
>> --- a/drivers/firmware/efi/cper.c
>> +++ b/drivers/firmware/efi/cper.c
>> @@ -32,6 +32,9 @@
>> #include <linux/acpi.h>
>> #include <linux/pci.h>
>> #include <linux/aer.h>
>> +#include <linux/printk.h>
>> +#include <linux/bcd.h>
>> +#include <acpi/ghes.h>
>>
>> #define INDENT_SP " "
>>
>> @@ -386,13 +389,37 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
>> pfx, pcie->bridge.secondary_status, pcie->bridge.control);
>> }
>>
>> +static void cper_estatus_print_section_v300(const char *pfx,
>> + const struct acpi_hest_generic_data_v300 *gdata)
>> +{
>> + __u8 hour, min, sec, day, mon, year, century, *timestamp;
>> +
>> + if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>> + timestamp = (__u8 *)&(gdata->time_stamp);
>> + sec = bcd2bin(timestamp[0]);
>> + min = bcd2bin(timestamp[1]);
>> + hour = bcd2bin(timestamp[2]);
>> + day = bcd2bin(timestamp[4]);
>> + mon = bcd2bin(timestamp[5]);
>> + year = bcd2bin(timestamp[6]);
>> + century = bcd2bin(timestamp[7]);
>> + printk("%stime: %7s %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
>> + 0x01 & *(timestamp + 3) ? "precise" : "", century,
>> + year, mon, day, hour, min, sec);
>> + }
>> +}
>> +
>> static void cper_estatus_print_section(
>> - const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
>> + const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
>> {
>> uuid_le *sec_type = (uuid_le *)gdata->section_type;
>> __u16 severity;
>> char newpfx[64];
>>
>> + if (acpi_hest_generic_data_version(gdata) >= 3)
>> + cper_estatus_print_section_v300(pfx,
>> + (const struct acpi_hest_generic_data_v300 *)gdata);
>> +
>> severity = gdata->error_severity;
>> printk("%s""Error %d, type: %s\n", pfx, sec_no,
>> cper_severity_str(severity));
>> @@ -403,14 +430,18 @@ static void cper_estatus_print_section(
>>
>> snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>> if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
>> - struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
>> + struct cper_sec_proc_generic *proc_err;
>> +
>> + proc_err = acpi_hest_generic_data_payload(gdata);
>> printk("%s""section_type: general processor error\n", newpfx);
>> if (gdata->error_data_length >= sizeof(*proc_err))
>> cper_print_proc_generic(newpfx, proc_err);
>> else
>> goto err_section_too_small;
>> } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
>> - struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
>> + struct cper_sec_mem_err *mem_err;
>> +
>> + mem_err = acpi_hest_generic_data_payload(gdata);
>> printk("%s""section_type: memory error\n", newpfx);
>> if (gdata->error_data_length >=
>> sizeof(struct cper_sec_mem_err_old))
>> @@ -419,7 +450,9 @@ static void cper_estatus_print_section(
>> else
>> goto err_section_too_small;
>> } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
>> - struct cper_sec_pcie *pcie = (void *)(gdata + 1);
>> + struct cper_sec_pcie *pcie;
>> +
>> + pcie = acpi_hest_generic_data_payload(gdata);
>> printk("%s""section_type: PCIe error\n", newpfx);
>> if (gdata->error_data_length >= sizeof(*pcie))
>> cper_print_pcie(newpfx, pcie, gdata);
>> @@ -438,7 +471,7 @@ void cper_estatus_print(const char *pfx,
>> const struct acpi_hest_generic_status *estatus)
>> {
>> struct acpi_hest_generic_data *gdata;
>> - unsigned int data_len, gedata_len;
>> + unsigned int data_len;
>> int sec_no = 0;
>> char newpfx[64];
>> __u16 severity;
>> @@ -451,12 +484,12 @@ void cper_estatus_print(const char *pfx,
>> printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
>> data_len = estatus->data_length;
>> gdata = (struct acpi_hest_generic_data *)(estatus + 1);
>> +
>> snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>> - while (data_len >= sizeof(*gdata)) {
>> - gedata_len = gdata->error_data_length;
>> +
>> + while (data_len >= acpi_hest_generic_data_size(gdata)) {
>> cper_estatus_print_section(newpfx, gdata, sec_no);
>> - data_len -= gedata_len + sizeof(*gdata);
>> - gdata = (void *)(gdata + 1) + gedata_len;
>> + gdata = acpi_hest_generic_data_next(gdata);
>> sec_no++;
>> }
>> }
>> @@ -486,12 +519,13 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
>> return rc;
>> data_len = estatus->data_length;
>> gdata = (struct acpi_hest_generic_data *)(estatus + 1);
>> - while (data_len >= sizeof(*gdata)) {
>> - gedata_len = gdata->error_data_length;
>> - if (gedata_len > data_len - sizeof(*gdata))
>> +
>> + while (data_len >= acpi_hest_generic_data_size(gdata)) {
>> + gedata_len = acpi_hest_generic_data_error_length(gdata);
>> + if (gedata_len > data_len - acpi_hest_generic_data_size(gdata))
>> return -EINVAL;
>> - data_len -= gedata_len + sizeof(*gdata);
>> - gdata = (void *)(gdata + 1) + gedata_len;
>> + data_len -= gedata_len + acpi_hest_generic_data_size(gdata);
>> + gdata = acpi_hest_generic_data_next(gdata);
>> }
>> if (data_len)
>> return -EINVAL;
>> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
>> index 68f088a..56b9679 100644
>> --- a/include/acpi/ghes.h
>> +++ b/include/acpi/ghes.h
>> @@ -73,3 +73,13 @@ static inline void ghes_edac_unregister(struct ghes *ghes)
>> {
>> }
>> #endif
>> +
>> +#define acpi_hest_generic_data_version(gdata) \
>> + (gdata->revision >> 8)
>> +
>> +static inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
>> +{
>> + return acpi_hest_generic_data_version(gdata) >= 3 ?
>> + (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
>> + gdata + 1;
>> +}
>> diff --git a/include/linux/cper.h b/include/linux/cper.h
>> index dcacb1a..13ea41c 100644
>> --- a/include/linux/cper.h
>> +++ b/include/linux/cper.h
>> @@ -255,6 +255,18 @@ enum {
>>
>> #define CPER_PCIE_SLOT_SHIFT 3
>>
>> +#define acpi_hest_generic_data_error_length(gdata) \
>> + (((struct acpi_hest_generic_data *)(gdata))->error_data_length)
>> +#define acpi_hest_generic_data_size(gdata) \
>> + ((acpi_hest_generic_data_version(gdata) >= 3) ? \
>> + sizeof(struct acpi_hest_generic_data_v300) : \
>> + sizeof(struct acpi_hest_generic_data))
>> +#define acpi_hest_generic_data_record_size(gdata) \
>> + (acpi_hest_generic_data_size(gdata) + \
>> + acpi_hest_generic_data_error_length(gdata))
>> +#define acpi_hest_generic_data_next(gdata) \
>> + ((void *)(gdata) + acpi_hest_generic_data_record_size(gdata))
>> +
> How come these aren't in ghes.h?
It probably does make more sense to add these in ghes.h, I'll move them
there in the next set.
> Reviewed-by: James Morse <james.morse@....com>
>
Thanks!
Tyler
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.
Powered by blists - more mailing lists