lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20240619143117.00000c9c@Huawei.com>
Date: Wed, 19 Jun 2024 14:31:17 +0100
From: Jonathan Cameron <Jonathan.Cameron@...wei.com>
To: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
CC: Borislav Petkov <bp@...en8.de>, James Morse <james.morse@....com>, "Rafael
 J. Wysocki" <rafael@...nel.org>, Shiju Jose <shiju.jose@...wei.com>, Tony
 Luck <tony.luck@...el.com>, Uwe Kleine-König
	<u.kleine-koenig@...gutronix.de>, "Alison Schofield"
	<alison.schofield@...el.com>, Ard Biesheuvel <ardb@...nel.org>, Dan Williams
	<dan.j.williams@...el.com>, Dave Jiang <dave.jiang@...el.com>, Ira Weiny
	<ira.weiny@...el.com>, Len Brown <lenb@...nel.org>, Shuai Xue
	<xueshuai@...ux.alibaba.com>, <linux-acpi@...r.kernel.org>,
	<linux-edac@...r.kernel.org>, <linux-efi@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2] efi/cper: align ARM CPER type with UEFI 2.9A/2.10
 specs

On Wed, 19 Jun 2024 12:52:38 +0200
Mauro Carvalho Chehab <mchehab+huawei@...nel.org> wrote:

> Up to UEFI spec, the type byte of CPER struct was defined simply
> as:
> 
> Type at byte offset 4:
> 
> 	- Cache error
> 	- TLB Error
> 	- Bus Error
> 	- Micro-architectural Error
> 	All other values are reserved
> 
> Yet, there was no information about how this would be encoded.
> 
> Spec 2.9A corrected it by defining:
> 
> 	- Bit 1 - Cache Error
> 	- Bit 2 - TLB Error
> 	- Bit 3 - Bus Error
> 	- Bit 4 - Micro-architectural Error
> 	All other values are reserved
> 
> Spec 2.10 also preserve the same encoding as 2.9A
> 
> See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information
> 
> Adjust CPER handling code for ARM to properly handle UEFI 2.9A and
> 2.10 encoding.

Hi Mauro,

I'd be tempted to use "ARM Processor" throughout this patch description
as could in theory be something else and currently the link
is the only way to tell!

A few comments inline.

Good catch on the spec change btw.

> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
> ---
>  drivers/acpi/apei/ghes.c        | 19 +++++++++++---
>  drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++-------------------
>  include/linux/cper.h            |  9 +++----
>  3 files changed, 37 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index ed32bbecb4a3..365de4115508 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -546,9 +546,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
>  	p = (char *)(err + 1);
>  	for (i = 0; i < err->err_info_num; i++) {
>  		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
> -		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
> +		bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR);

Matches local style I guess but the () are unnecessary.

>  		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
> -		const char *error_type = "unknown error";
> +		char error_type[120] = "";

> +		char *s = error_type;
> +		int len = 0;
> +		int i;

Shadowing i which is bad for readability.

>  
>  		/*
>  		 * The field (err_info->error_info & BIT(26)) is fixed to set to
> @@ -562,8 +565,16 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
>  			continue;
>  		}
>  
> -		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
> -			error_type = cper_proc_error_type_strs[err_info->type];
> +		for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) {
> +			if (!(err_info->type & (1U << i)))
> +				continue;
> +
> +			len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]);

Size of the index into the type string array?  I'm confused.
sizeof(error_type) maybe?

Also, maybe break that long line of code before cper_*


> +			s += len;
> +		}
> +
> +		if (!*error_type)
> +			strscpy(error_type, "unknown error", sizeof(error_type));

Perhaps should handle multiple bits where only one is unknown?
So maybe compare with a mask of known bits and print this on the end (perhaps
including which bit)?

>  
>  		pr_warn_ratelimited(FW_WARN GHES_PFX
>  				    "Unhandled processor error type: %s\n",
> diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
> index fa9c1c3bf168..f57641eb548a 100644
> --- a/drivers/firmware/efi/cper-arm.c
> +++ b/drivers/firmware/efi/cper-arm.c
> @@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
>  	bool proc_context_corrupt, corrected, precise_pc, restartable_pc;
>  	bool time_out, access_mode;
>  
> -	/* If the type is unknown, bail. */
> -	if (type > CPER_ARM_MAX_TYPE)
> -		return;
> -
>  	/*
>  	 * Vendor type errors have error information values that are vendor
>  	 * specific.
>  	 */
> -	if (type == CPER_ARM_VENDOR_ERROR)
> +	if (type & CPER_ARM_VENDOR_ERROR)
>  		return;
>  
>  	if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) {
> @@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
>  	if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) {
>  		op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT)
>  			   & CPER_ARM_ERR_OPERATION_MASK);
> -		switch (type) {
> -		case CPER_ARM_CACHE_ERROR:
> +		if (type & CPER_ARM_CACHE_ERROR) {
>  			if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) {
> -				printk("%soperation type: %s\n", pfx,
> +				printk("%scache error: %s\n", pfx,
>  				       arm_cache_err_op_strs[op_type]);
Can we keep that this is an operation type in print?
"%scache error, operation type: %s\n" perhaps?

>  			}
> -			break;
> -		case CPER_ARM_TLB_ERROR:
> +		}
> +		if (type & CPER_ARM_TLB_ERROR) {
>  			if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) {
> -				printk("%soperation type: %s\n", pfx,
> +				printk("%sTLB error: %s\n", pfx,
>  				       arm_tlb_err_op_strs[op_type]);
>  			}
> -			break;
> -		case CPER_ARM_BUS_ERROR:
> +		}
> +		if (type & CPER_ARM_BUS_ERROR) {
>  			if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) {
> -				printk("%soperation type: %s\n", pfx,
> +				printk("%sbus error: %s\n", pfx,
>  				       arm_bus_err_op_strs[op_type]);
>  			}
> -			break;
>  		}
>  	}
>  
>  	if (error_info & CPER_ARM_ERR_VALID_LEVEL) {
>  		level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT)
>  			 & CPER_ARM_ERR_LEVEL_MASK);

Not a today thing, but would be lovely to use FIELD_GET()
for all these with appropriately fixed up mask definitions.
Right now it is inconsistent as the valid entries are handled
as shifted values, and we have GENMASK(X,0) plus a shift for these.

> -		switch (type) {
> -		case CPER_ARM_CACHE_ERROR:
> +		if (type & CPER_ARM_CACHE_ERROR)
>  			printk("%scache level: %d\n", pfx, level);
> -			break;
> -		case CPER_ARM_TLB_ERROR:
> +
> +		if (type & CPER_ARM_TLB_ERROR)
>  			printk("%sTLB level: %d\n", pfx, level);
> -			break;
> -		case CPER_ARM_BUS_ERROR:
> +
> +		if (type & CPER_ARM_BUS_ERROR)
>  			printk("%saffinity level at which the bus error occurred: %d\n",
>  			       pfx, level);
> -			break;
> -		}
>  	}



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ