[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251027133542.GA8279@yaz-khff2.amd.com>
Date: Mon, 27 Oct 2025 09:35:42 -0400
From: Yazen Ghannam <yazen.ghannam@....com>
To: Borislav Petkov <bp@...en8.de>
Cc: x86@...nel.org, Tony Luck <tony.luck@...el.com>,
"Rafael J. Wysocki" <rafael@...nel.org>,
Len Brown <lenb@...nel.org>, linux-kernel@...r.kernel.org,
linux-edac@...r.kernel.org, Smita.KoralahalliChannabasappa@....com,
Qiuxu Zhuo <qiuxu.zhuo@...el.com>,
Nikolay Borisov <nik.borisov@...e.com>,
Bert Karwatzki <spasswolf@....de>, linux-acpi@...r.kernel.org
Subject: Re: [PATCH v7 2/8] x86/mce: Unify AMD DFR handler with MCA Polling
On Sat, Oct 25, 2025 at 05:03:04PM +0200, Borislav Petkov wrote:
> On Fri, Oct 24, 2025 at 11:27:23PM +0200, Borislav Petkov wrote:
> > On Fri, Oct 24, 2025 at 04:30:12PM -0400, Yazen Ghannam wrote:
> > > Should I send another revision?
> >
> > Nah, I'm not done simplifying this yet. :-P
>
> Yeah, no, looks ok now:
>
> ---
> From: Yazen Ghannam <yazen.ghannam@....com>
> Date: Thu, 16 Oct 2025 16:37:47 +0000
> Subject: [PATCH] x86/mce: Unify AMD DFR handler with MCA Polling
>
> AMD systems optionally support a deferred error interrupt. The interrupt
> should be used as another signal to trigger MCA polling. This is similar to
> how other MCA interrupts are handled.
>
> Deferred errors do not require any special handling related to the interrupt,
> e.g. resetting or rearming the interrupt, etc.
>
> However, Scalable MCA systems include a pair of registers, MCA_DESTAT and
> MCA_DEADDR, that should be checked for valid errors. This check should be done
> whenever MCA registers are polled. Currently, the deferred error interrupt
> does this check, but the MCA polling function does not.
>
> Call the MCA polling function when handling the deferred error interrupt. This
> keeps all "polling" cases in a common function.
>
> Add an SMCA status check helper. This will do the same status check and
> register clearing that the interrupt handler has done. And it extends the
> common polling flow to find AMD deferred errors.
>
> Clear the MCA_DESTAT register at the end of the handler rather than the
> beginning. This maintains the procedure that the 'status' register must be
> cleared as the final step.
>
> [ bp: Zap commit message pieces explaining what the patch does;
> zap unnecessary special-casing of deferred errors. ]
>
> Signed-off-by: Yazen Ghannam <yazen.ghannam@....com>
> Signed-off-by: Borislav Petkov (AMD) <bp@...en8.de>
> Link: https://lore.kernel.org/all/20251016-wip-mca-updates-v7-0-5c139a4062cb@amd.com
> ---
> arch/x86/include/asm/mce.h | 6 ++
> arch/x86/kernel/cpu/mce/amd.c | 111 ++++-----------------------------
> arch/x86/kernel/cpu/mce/core.c | 44 ++++++++++++-
> 3 files changed, 62 insertions(+), 99 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 31e3cb550fb3..7d6588195d56 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -165,6 +165,12 @@
> */
> #define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
>
> +/*
> + * Indicates that handler should check and clear Deferred error registers
> + * rather than common ones.
> + */
> +#define MCE_CHECK_DFR_REGS BIT_ULL(8)
> +
> /*
> * This structure contains all data related to the MCE log. Also
> * carries a signature to make it easier to find from external
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index ac6a98aa7bc2..d9f9ee7db5c8 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -56,6 +56,7 @@ static bool thresholding_irq_en;
>
> struct mce_amd_cpu_data {
> mce_banks_t thr_intr_banks;
> + mce_banks_t dfr_intr_banks;
> };
>
> static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
> @@ -300,8 +301,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
> * APIC based interrupt. First, check that no interrupt has been
> * set.
> */
> - if ((low & BIT(5)) && !((high >> 5) & 0x3))
> + if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
> + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
> high |= BIT(5);
> + }
>
> this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
>
> @@ -792,37 +795,6 @@ bool amd_mce_usable_address(struct mce *m)
> return false;
> }
>
> -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
> -{
> - struct mce_hw_err err;
> - struct mce *m = &err.m;
> -
> - mce_prep_record(&err);
> -
> - m->status = status;
> - m->misc = misc;
> - m->bank = bank;
> - m->tsc = rdtsc();
> -
> - if (m->status & MCI_STATUS_ADDRV) {
> - m->addr = addr;
> -
> - smca_extract_err_addr(m);
> - }
> -
> - if (mce_flags.smca) {
> - rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
> -
> - if (m->status & MCI_STATUS_SYNDV) {
> - rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
> - rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
> - rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
> - }
> - }
> -
> - mce_log(&err);
> -}
> -
> DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
> {
> trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
> @@ -832,75 +804,10 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
> apic_eoi();
> }
>
> -/*
> - * Returns true if the logged error is deferred. False, otherwise.
> - */
> -static inline bool
> -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
> -{
> - u64 status, addr = 0;
> -
> - rdmsrq(msr_stat, status);
> - if (!(status & MCI_STATUS_VAL))
> - return false;
> -
> - if (status & MCI_STATUS_ADDRV)
> - rdmsrq(msr_addr, addr);
> -
> - __log_error(bank, status, addr, misc);
> -
> - wrmsrq(msr_stat, 0);
> -
> - return status & MCI_STATUS_DEFERRED;
> -}
> -
> -static bool _log_error_deferred(unsigned int bank, u32 misc)
> -{
> - if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
> - mca_msr_reg(bank, MCA_ADDR), misc))
> - return false;
> -
> - /*
> - * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
> - * Return true here to avoid accessing these registers.
> - */
> - if (!mce_flags.smca)
> - return true;
> -
> - /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
> - wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
> - return true;
> -}
> -
> -/*
> - * We have three scenarios for checking for Deferred errors:
> - *
> - * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> - * clear MCA_DESTAT.
> - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> - * log it.
> - */
> -static void log_error_deferred(unsigned int bank)
> -{
> - if (_log_error_deferred(bank, 0))
> - return;
> -
> - /*
> - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
> - * for a valid error.
> - */
> - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
> - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
> -}
> -
> /* APIC interrupt handler for deferred errors */
> static void amd_deferred_error_interrupt(void)
> {
> - unsigned int bank;
> -
> - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
> - log_error_deferred(bank);
> + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
> }
>
> static void reset_block(struct threshold_block *block)
> @@ -952,6 +859,14 @@ void amd_clear_bank(struct mce *m)
> {
> amd_reset_thr_limit(m->bank);
>
> + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
> + if (m->status & MCI_STATUS_DEFERRED)
> + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
> +
> + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
> + if (m->kflags & MCE_CHECK_DFR_REGS)
> + return;
> +
> mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
> }
>
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 460e90a1a0b1..7be062429ce3 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
> m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
>
> if (m->status & MCI_STATUS_ADDRV) {
> - m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
> + if (m->kflags & MCE_CHECK_DFR_REGS)
> + m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
> + else
> + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
>
> /*
> * Mask the reported address by the reported granularity.
> @@ -714,6 +717,42 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
>
> DEFINE_PER_CPU(unsigned, mce_poll_count);
>
> +/*
> + * We have three scenarios for checking for Deferred errors:
> + *
> + * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> + * clear MCA_DESTAT.
> + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> + * log it.
> + */
> +static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> +{
> + struct mce *m = &err->m;
> +
> + /*
> + * If the MCA_STATUS register has a deferred error, then continue using it as
> + * the status register.
> + *
> + * MCA_DESTAT will be cleared at the end of the handler.
> + */
> + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
> + return true;
> +
> + /*
> + * If the MCA_DESTAT register has a deferred error, then use it instead.
> + *
> + * MCA_STATUS will not be cleared at the end of the handler.
> + */
> + m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
> + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
> + m->kflags |= MCE_CHECK_DFR_REGS;
> + return true;
> + }
> +
> + return false;
> +}
> +
No, this still isn't right. Sorry, I had a brain freeze before.
This function only returns true for valid deferred errors. Other errors
return false.
> /*
> * Newer Intel systems that support software error
> * recovery need to make additional checks. Other
> @@ -740,6 +779,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> {
> struct mce *m = &err->m;
>
> + if (mce_flags.smca)
> + return smca_should_log_poll_error(flags, err);
> +
This will never find corrected errors or uncorrected (non-deferred)
errors. That's one of the reasons to add the MCP_DFR flag.
Otherwise, we'd need to include some of the same checks from below.
> /* If this entry is not valid, ignore it. */
> if (!(m->status & MCI_STATUS_VAL))
> return false;
> --
Thanks,
Yazen
Powered by blists - more mailing lists