lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250728144031.GA33292@yaz-khff2.amd.com>
Date: Mon, 28 Jul 2025 10:40:31 -0400
From: Yazen Ghannam <yazen.ghannam@....com>
To: Avadhut Naik <avadhut.naik@....com>
Cc: linux-edac@...r.kernel.org, bp@...en8.de, john.allen@....com,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] RAS/AMD/ATL: Translate UMC normalized address to
 DRAM address using PRM

On Thu, Jul 17, 2025 at 04:48:42PM +0000, Avadhut Naik wrote:
> Modern AMD SOCs like Zen5 provide UEFI PRM module that implements various

Minor nit: Zen5 is a core revision and doesn't represent an SoC.

You could say "Recent AMD systems...". A platform (FW/OS) interface like
PRM doesn't depend on hardware revisions.

> address translation PRM handlers.[1] These handlers can be invoked by the
> OS or hypervisor at runtime to perform address translations.
> 
> On AMD's Zen-based SOCs, Unified Memory Controller (UMC) relative
> "normalized" address is reported through MCA_ADDR of UMC SMCA bank type
> on occurrence of a DRAM ECC error. This address must be converted into
> system physical address and DRAM address to export additional information
> about the error.
> 
> Add support to convert normalized address into DRAM address through the
> appropriate PRM handler. Support for obtaining the system physical address

It's not necessary to mention that the SPA translation already exists.

> already exists. Instead of logging the translated DRAM address locally,
> register the translating function when the Address Translation library is
> initialized. Modules like amd64_edac can then invoke the PRM handler to
> add the DRAM address to their error records. Additionally, it can also be
> exported through the RAS tracepont.
> 
> [1] AMD Family 1Ah Models 00h–0Fh and Models 10h–1Fh ACPI v6.5 Porting Guide, Chapter 22

Could this be a link?
https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#links-to-documentation

> 
> Signed-off-by: Avadhut Naik <avadhut.naik@....com>
> ---
>  drivers/ras/amd/atl/core.c     |  3 ++-
>  drivers/ras/amd/atl/internal.h |  9 +++++++++
>  drivers/ras/amd/atl/prm.c      | 36 ++++++++++++++++++++++++++++++----
>  drivers/ras/amd/atl/umc.c      | 12 ++++++++++++
>  drivers/ras/ras.c              | 18 +++++++++++++++--
>  include/linux/ras.h            | 19 +++++++++++++++++-
>  6 files changed, 89 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
> index 4197e10993ac..ca1646d030ca 100644
> --- a/drivers/ras/amd/atl/core.c
> +++ b/drivers/ras/amd/atl/core.c
> @@ -207,7 +207,8 @@ static int __init amd_atl_init(void)
>  
>  	/* Increment this module's recount so that it can't be easily unloaded. */
>  	__module_get(THIS_MODULE);
> -	amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
> +	amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr,
> +				 convert_umc_mca_addr_to_dram_addr);
>  
>  	pr_info("AMD Address Translation Library initialized\n");
>  	return 0;
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 2b6279d32774..53095310438c 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -279,18 +279,27 @@ int dehash_address(struct addr_ctx *ctx);
>  
>  unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
>  unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
> +int convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr);
>  
>  u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
>  u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
>  
>  #ifdef CONFIG_AMD_ATL_PRM
>  unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr);
> +int prm_umc_norm_to_dram_addr(u8 socket_id, u64 bank_id,
> +			      unsigned long addr, struct dram_addr *dram_addr);
>  #else
>  static inline unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id,
>  						     unsigned long addr)
>  {
>         return -ENODEV;
>  }
> +
> +static inline int prm_umc_norm_to_dram_addr(u8 socket_id, u64 bank_id,
> +					    unsigned long addr, struct dram_addr *dram_addr)
> +{
> +	return -ENODEV;
> +}
>  #endif
>  
>  /*
> diff --git a/drivers/ras/amd/atl/prm.c b/drivers/ras/amd/atl/prm.c
> index 0931a20d213b..9bbaf8c85da0 100644
> --- a/drivers/ras/amd/atl/prm.c
> +++ b/drivers/ras/amd/atl/prm.c
> @@ -19,10 +19,11 @@
>  #include <linux/prmt.h>
>  
>  /*
> - * PRM parameter buffer - normalized to system physical address, as described
> - * in the "PRM Parameter Buffer" section of the AMD ACPI Porting Guide.
> + * PRM parameter buffer - normalized to system physical address and normalized
> + * to DRAM address, as described in the "PRM Parameter Buffer" section of the
> + * AMD ACPI Porting Guide.
>   */
> -struct norm_to_sys_param_buf {
> +struct prm_parameter_buffer {
>  	u64 norm_addr;
>  	u8 socket;
>  	u64 bank_id;
> @@ -33,9 +34,13 @@ static const guid_t norm_to_sys_guid = GUID_INIT(0xE7180659, 0xA65D, 0x451D,
>  						 0x92, 0xCD, 0x2B, 0x56, 0xF1,
>  						 0x2B, 0xEB, 0xA6);
>  
> +static const guid_t norm_to_dram_guid = GUID_INIT(0x7626C6AE, 0xF973, 0x429C,
> +						 0xA9, 0x1C, 0x10, 0x7D, 0x7B,
> +						 0xE2, 0x98, 0xB0);
> +
>  unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 bank_id, unsigned long addr)
>  {
> -	struct norm_to_sys_param_buf p_buf;
> +	struct prm_parameter_buffer p_buf;
>  	unsigned long ret_addr;
>  	int ret;
>  
> @@ -55,3 +60,26 @@ unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 bank_id, unsigned long
>  
>  	return ret;
>  }
> +
> +int prm_umc_norm_to_dram_addr(u8 socket_id, u64 bank_id,
> +			      unsigned long addr, struct dram_addr *dram_addr)
> +{
> +	struct prm_parameter_buffer p_buf;
> +	int ret;
> +
> +	p_buf.norm_addr	= addr;
> +	p_buf.socket	= socket_id;
> +	p_buf.bank_id	= bank_id;
> +	p_buf.out_buf	= dram_addr;
> +
> +	ret = acpi_call_prm_handler(norm_to_dram_guid, &p_buf);
> +	if (!ret)
> +		return ret;
> +
> +	if (ret == -ENODEV)
> +		pr_debug("PRM module/handler not available.\n");
> +	else
> +		pr_notice_once("PRM DRAM Address Translation failed.\n");
> +
> +	return ret;
> +}
> diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c
> index 6e072b7667e9..df6accae8929 100644
> --- a/drivers/ras/amd/atl/umc.c
> +++ b/drivers/ras/amd/atl/umc.c
> @@ -427,3 +427,15 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
>  
>  	return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr);
>  }
> +
> +int convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr)
> +{
> +	u8 socket_id = topology_physical_package_id(err->cpu);
> +	unsigned long addr = get_addr(err->addr);
> +	u64 bank_id = err->ipid;
> +	int ret;
> +
> +	ret = prm_umc_norm_to_dram_addr(socket_id, bank_id, addr, dram_addr);
> +
> +	return ret;

The 'ret' variable is not necessary. Just return the function call.

You can go even further and not use any new variables. Not sure how
it'll be aesthetically, but that was my first thought.

> +}
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> index a6e4792a1b2e..cae6388d41be 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -19,15 +19,20 @@
>   */
>  static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err);
>  
> -void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *))
> +static int (*amd_atl_umc_na_to_dram_addr)(struct atl_err *err, struct dram_addr *dram_addr);
> +
> +void amd_atl_register_decoder(unsigned long (*f1)(struct atl_err *),
> +			      int (*f2)(struct atl_err *, struct dram_addr *))
>  {
> -	amd_atl_umc_na_to_spa = f;
> +	amd_atl_umc_na_to_spa = f1;
> +	amd_atl_umc_na_to_dram_addr = f2;
>  }
>  EXPORT_SYMBOL_GPL(amd_atl_register_decoder);
>  
>  void amd_atl_unregister_decoder(void)
>  {
>  	amd_atl_umc_na_to_spa = NULL;
> +	amd_atl_umc_na_to_dram_addr = NULL;
>  }
>  EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder);
>  
> @@ -39,6 +44,15 @@ unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
>  	return amd_atl_umc_na_to_spa(err);
>  }
>  EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr);
> +
> +int amd_convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr)
> +{
> +	if (!amd_atl_umc_na_to_dram_addr)
> +		return -EINVAL;
> +
> +	return amd_atl_umc_na_to_dram_addr(err, dram_addr);
> +}
> +EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_dram_addr);
>  #endif /* CONFIG_AMD_ATL */
>  
>  #define CREATE_TRACE_POINTS
> diff --git a/include/linux/ras.h b/include/linux/ras.h
> index a64182bc72ad..feb53f8470b0 100644
> --- a/include/linux/ras.h
> +++ b/include/linux/ras.h
> @@ -42,15 +42,32 @@ struct atl_err {
>  	u32 cpu;
>  };
>  
> +struct dram_addr {

There's another struct in the kernel called 'dram_addr'.

It may help to make this more unique with a prefix: atl_dram_addr.

> +	u8 chip_select;
> +	u8 bank_group;
> +	u8 bank_addr;
> +	u32 row_addr;
> +	u16 col_addr;
> +	u8 rank_mul;
> +	u8 sub_ch;
> +} __packed;
> +
>  #if IS_ENABLED(CONFIG_AMD_ATL)
> -void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
> +void amd_atl_register_decoder(unsigned long (*f1)(struct atl_err *),
> +			      int (*f2)(struct atl_err *, struct dram_addr *));
>  void amd_atl_unregister_decoder(void);
>  void amd_retire_dram_row(struct atl_err *err);
>  unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
> +int amd_convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr);
>  #else
>  static inline void amd_retire_dram_row(struct atl_err *err) { }
>  static inline unsigned long
>  amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
> +static inline int amd_convert_umc_mca_addr_to_dram_addr(struct atl_err *err,
> +							struct dram_addr *dram_addr)
> +{
> +	return -EINVAL;
> +}
>  #endif /* CONFIG_AMD_ATL */
>  
>  #endif /* __RAS_H__ */
> -- 

Thanks,
Yazen

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ