linux-kernel - Re: [PATCH 1/4] EDAC, MCE, AMD: Enable error decoding of Scalable MCA errors

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160223123744.GC3673@pd.tnic>
Date:	Tue, 23 Feb 2016 13:37:44 +0100
From:	Borislav Petkov <bp@...en8.de>
To:	Aravind Gopalakrishnan <Aravind.Gopalakrishnan@....com>
Cc:	tony.luck@...el.com, hpa@...or.com, mingo@...hat.com,
	tglx@...utronix.de, dougthompson@...ssion.com,
	mchehab@....samsung.com, x86@...nel.org,
	linux-edac@...r.kernel.org, linux-kernel@...r.kernel.org,
	ashok.raj@...el.com, gong.chen@...ux.intel.com,
	len.brown@...el.com, peterz@...radead.org, ak@...ux.intel.com,
	alexander.shishkin@...ux.intel.com
Subject: Re: [PATCH 1/4] EDAC, MCE, AMD: Enable error decoding of Scalable
 MCA errors

On Tue, Feb 16, 2016 at 03:45:08PM -0600, Aravind Gopalakrishnan wrote:
> For Scalable MCA enabled processors, errors are listed
> per IP block. And since it is not required for an IP to
> map to a particular bank, we need to use HWID and McaType
> values from the MCx_IPID register to figure out which IP
> a given bank represents.
> 
> We also have a new bit (TCC) in the MCx_STATUS register
> to indicate Task context is corrupt.
> 
> Add logic here to decode errors from all known IP
> blocks for Fam17h Model 00-0fh and to print TCC errors.
> 
> Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@....com>
> ---
>  arch/x86/include/asm/mce.h           |  50 ++++++
>  arch/x86/include/asm/msr-index.h     |   2 +
>  arch/x86/kernel/cpu/mcheck/mce_amd.c |  11 ++
>  drivers/edac/mce_amd.c               | 327 ++++++++++++++++++++++++++++++++++-
>  4 files changed, 389 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 2ea4527..2ec67ac 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -42,6 +42,17 @@
>  /* AMD-specific bits */
>  #define MCI_STATUS_DEFERRED	(1ULL<<44)  /* declare an uncorrected error */
>  #define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
> +#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */

\n

> +/*
> + * McaX field if set indicates a given bank supports MCA extensions:
> + *  - Deferred error interrupt type is specifiable by bank
> + *  - BlkPtr field indicates prescence of extended MISC registers.
				^^^^^^^^^

Btw, that's MCi_MISC[BlkPtr] ?

Also, please integrate a spell checker into your patch creation
workflow.


> + *    But should not be used to determine MSR numbers
> + *  - TCC bit is present in MCx_STATUS

All sentences end with a "."

> + */
> +#define MCI_CONFIG_MCAX		0x1
> +#define MCI_IPID_MCATYPE	0xFFFF0000
> +#define MCI_IPID_HWID		0xFFF
>  
>  /*
>   * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
> @@ -287,4 +298,43 @@ struct cper_sec_mem_err;
>  extern void apei_mce_report_mem_error(int corrected,
>  				      struct cper_sec_mem_err *mem_err);
>  
> +/*
> + * Enumerating new IP types and HWID values

Please stop using gerund, i.e., -ing, in comments and commit messages.

"Enumerate new IP ..." is just fine.

> + * in ScalableMCA enabled AMD processors
> + */
> +#ifdef CONFIG_X86_MCE_AMD
> +enum ip_types {

AMD-specific so "amd_ip_types"

> +	F17H_CORE = 0,	/* Core errors */
> +	DF,		/* Data Fabric */
> +	UMC,		/* Unified Memory Controller */
> +	FUSE,		/* FUSE subsystem */

What's FUSE subsystem?

In any case, this could use a different name in order not to confuse
with Linux's filesystem in userspace.

> +	PSP,		/* Platform Security Processor */
> +	SMU,		/* System Management Unit */
> +	N_IP_TYPES
> +};
> +
> +struct hwid {

amd_hwid and so on. All below should have the "amd_" prefix so that it
is obvious.

> +	const char *ipname;
> +	unsigned int hwid_value;
> +};
> +
> +extern struct hwid hwid_mappings[N_IP_TYPES];


> +
> +enum core_mcatypes {
> +	LS = 0,		/* Load Store */
> +	IF,		/* Instruction Fetch */
> +	L2_CACHE,	/* L2 cache */
> +	DE,		/* Decoder unit */
> +	RES,		/* Reserved */
> +	EX,		/* Execution unit */
> +	FP,		/* Floating Point */
> +	L3_CACHE	/* L3 cache */
> +};
> +
> +enum df_mcatypes {
> +	CS = 0,		/* Coherent Slave */
> +	PIE		/* Power management, Interrupts, etc */
> +};
> +#endif

So all those are defined here but we have a header for exactly that
drivers/edac/mce_amd.h. And then you define and export hwid_mappings in
arch/x86/kernel/cpu/mcheck/mce_amd.c to not use it there.

Why isn't all this in drivers/edac/mce_amd.[ch] ?

And if it is there, then you obviously don't need the "amd_" prefix.

> +
>  #endif /* _ASM_X86_MCE_H */
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 5523465..93bccbc 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -266,7 +266,9 @@
>  
>  /* 'SMCA': AMD64 Scalable MCA */
>  #define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
> +#define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
>  #define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
> +#define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))

Are those MSRs used in multiple files? If not, -> mce.h.

>  #define MSR_P6_PERFCTR0			0x000000c1
>  #define MSR_P6_PERFCTR1			0x000000c2
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index 88de27b..8169103 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -71,6 +71,17 @@ static const char * const th_names[] = {
>  	"execution_unit",
>  };
>  
> +/* Defining HWID to IP type mappings for Scalable MCA */

" Define ..."

> +struct hwid hwid_mappings[] = {
> +	[F17H_CORE]	= { "f17h_core", 0xB0 },
> +	[DF]		= { "df", 0x2E },
> +	[UMC]		= { "umc", 0x96 },
> +	[FUSE]		= { "fuse", 0x5 },
> +	[PSP]		= { "psp", 0xFF },
> +	[SMU]		= { "smu", 0x1 },
> +};
> +EXPORT_SYMBOL_GPL(hwid_mappings);
> +
>  static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
>  static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
>  
> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> index e3a945c..6e6b327 100644
> --- a/drivers/edac/mce_amd.c
> +++ b/drivers/edac/mce_amd.c
> @@ -147,6 +147,136 @@ static const char * const mc6_mce_desc[] = {
>  	"Status Register File",

...

> +static void decode_f17hcore_errors(u8 xec, unsigned int mca_type)
> +{
> +	switch (mca_type) {
> +	case LS:
> +		if (xec == 0x4 || xec > (ARRAY_SIZE(f17h_ls_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_ls_mce_desc[xec]);
> +		break;
> +
> +	case IF:
> +		if (xec > (ARRAY_SIZE(f17h_if_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_if_mce_desc[xec]);
> +		break;
> +
> +	case L2_CACHE:
> +		if (xec > (ARRAY_SIZE(f17h_l2_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_l2_mce_desc[xec]);
> +		break;
> +
> +	case DE:
> +		if (xec > (ARRAY_SIZE(f17h_de_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_de_mce_desc[xec]);
> +		break;
> +
> +	case EX:
> +		if (xec > (ARRAY_SIZE(f17h_ex_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_ex_mce_desc[xec]);
> +		break;
> +
> +	case FP:
> +		if (xec > (ARRAY_SIZE(f17h_fp_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_fp_mce_desc[xec]);
> +		break;
> +
> +	case L3_CACHE:
> +		if (xec > (ARRAY_SIZE(f17h_l3_mce_desc) - 1))
> +			goto wrong_f17hcore_error;
> +
> +		pr_cont("%s.\n", f17h_l3_mce_desc[xec]);
> +		break;
> +
> +	default:
> +		goto wrong_f17hcore_error;

That's a lot of repeated code. You can assign the desc array to a temp
variable depending on mca_type and do the if and pr_cont only once using
that temp variable.

> +	}
> +
> +	return;
> +
> +wrong_f17hcore_error:
> +	pr_cont("Unrecognized error code from %s MCA bank\n",
> +		(mca_type == L3_CACHE) ? "L3 Cache" : "F17h Core");
> +}
> +
> +static void decode_df_errors(u8 xec, unsigned int mca_type)
> +{
> +	switch (mca_type) {
> +	case  CS:
> +		if (xec > (ARRAY_SIZE(f17h_cs_mce_desc) - 1))
> +			goto wrong_df_error;
> +
> +		pr_cont("%s.\n", f17h_cs_mce_desc[xec]);
> +		break;
> +
> +	case PIE:
> +		if (xec > (ARRAY_SIZE(f17h_pie_mce_desc) - 1))
> +			goto wrong_df_error;
> +
> +		pr_cont("%s.\n", f17h_pie_mce_desc[xec]);
> +		break;

Ditto.

> +
> +	default:
> +		goto wrong_df_error;
> +	}
> +
> +	return;
> +
> +wrong_df_error:
> +	pr_cont("Unrecognized error code from DF MCA bank\n");
> +}
> +
> +/* Decode errors according to Scalable MCA specification */
> +static void decode_smca_errors(struct mce *m)
> +{
> +	u32 low, high;
> +	u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
> +	unsigned int hwid, mca_type, i;
> +	u8 xec = XEC(m->status, xec_mask);
> +
> +	if (rdmsr_safe(addr, &low, &high)) {
> +		pr_emerg("Unable to decode errors from banks\n");

That statement is not very useful in its current form.

> +		return;
> +	}
> +
> +	hwid = high & MCI_IPID_HWID;
> +	mca_type = (high & MCI_IPID_MCATYPE) >> 16;
> +
> +	pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n",
> +		 m->bank, high, low);
> +
> +	/*
> +	 * Based on hwid and mca_type values,
> +	 * decode errors from respective IPs.
> +	 * Note: mca_type values make sense only
> +	 * in the context of an hwid
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(hwid_mappings); i++)

So you use those hwid_mappings here. Why aren't they defined here too?

> +		if (hwid_mappings[i].hwid_value == hwid)
> +			break;
> +
> +	switch (i) {
> +	case F17H_CORE:
> +		pr_emerg(HW_ERR "%s Error: ",
> +			 (mca_type == L3_CACHE) ? "L3 Cache" : "F17h Core");
> +		decode_f17hcore_errors(xec, mca_type);
> +		break;
> +
> +	case DF:
> +		pr_emerg(HW_ERR "DF Error: ");
> +		decode_df_errors(xec, mca_type);
> +		break;
> +
> +	case UMC:
> +		pr_emerg(HW_ERR "UMC Error: ");
> +		if (xec > (ARRAY_SIZE(f17h_umc_mce_desc) - 1)) {
> +			pr_cont("Unrecognized error code from UMC MCA bank\n");
> +			return;
> +		}
> +		pr_cont("%s.\n", f17h_umc_mce_desc[xec]);
> +		break;
> +
> +	case FUSE:
> +		pr_emerg(HW_ERR "FUSE Error: ");
> +		if (xec > (ARRAY_SIZE(f17h_fuse_mce_desc) - 1)) {
> +			pr_cont("Unrecognized error code from FUSE MCA bank\n");
> +			return;
> +		}
> +		pr_cont("%s.\n", f17h_fuse_mce_desc[xec]);
> +		break;
> +
> +	case PSP:
> +		pr_emerg(HW_ERR "PSP Error: ");
> +		if (xec > (ARRAY_SIZE(f17h_psp_mce_desc) - 1)) {
> +			pr_cont("Unrecognized error code from PSP MCA bank\n");
> +			return;
> +		}
> +		pr_cont("%s.\n", f17h_psp_mce_desc[xec]);
> +		break;
> +
> +	case SMU:
> +		pr_emerg(HW_ERR "SMU Error: ");
> +		if (xec > (ARRAY_SIZE(f17h_smu_mce_desc) - 1)) {
> +			pr_cont("Unrecognized error code from SMU MCA bank\n");
> +			return;
> +		}
> +		pr_cont("%s.\n", f17h_smu_mce_desc[xec]);
> +		break;

Also a lot of repeated code which could be simplified.

> +
> +	default:
> +		pr_emerg(HW_ERR "HWID:%d does not match any existing IPs\n", hwid);
> +	}
> +}
> +
>  static const char *decode_error_status(struct mce *m)
>  {
>  	if (m->status & MCI_STATUS_UC) {
> @@ -769,11 +1071,21 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
>  		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
>  		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
>  
> -	if (c->x86 == 0x15 || c->x86 == 0x16)
> +	if (c->x86 >= 0x15)
>  		pr_cont("|%s|%s",
>  			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
>  			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
>  
> +	if (mce_flags.smca) {

ERROR: "mce_flags" [drivers/edac/edac_mce_amd.ko] undefined!
make[1]: *** [__modpost] Error 1
make: *** [modules] Error 2
make: *** Waiting for unfinished jobs....

Just read CPUID again here instead of exporting mce_flags.

> +		u32 smca_low, smca_high;


> +		u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);

s/smca_//

Also, all those MSRs should be defined in drivers/edac/mce_amd.h if not
used outside.

> +
> +		if (!rdmsr_safe(smca_addr, &smca_low, &smca_high) &&
> +		    (smca_low & MCI_CONFIG_MCAX))
> +			pr_cont("|%s",

No need for that break here.

> +				((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
> +	}
> +
>  	/* do the two bits[14:13] together */
>  	ecc = (m->status >> 45) & 0x3;
>  	if (ecc)
> @@ -784,6 +1096,11 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
>  	if (m->status & MCI_STATUS_ADDRV)
>  		pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
>  
> +	if (mce_flags.smca) {
> +		decode_smca_errors(m);
> +		goto err_code;
> +	}
> +
>  	if (!fam_ops)
>  		goto err_code;
>  
> @@ -888,6 +1205,14 @@ static int __init mce_amd_init(void)
>  		fam_ops->mc2_mce = f16h_mc2_mce;
>  		break;
>  
> +	case 0x17:
> +		xec_mask = 0x3f;
> +		if (!mce_flags.smca) {
> +			printk(KERN_WARNING "Decoding supported only on Scalable MCA enabled processors\n");

What is that supposed to do? I thought all new ones will have SMCA...

> +			return 0;
> +		}
> +		break;
> +
>  	default:
>  		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
>  		kfree(fam_ops);
> -- 
> 2.7.0
> 
> 

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.