lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZBxurh8ZC39pzZ0a@yaz-khff.amd.com>
Date:   Thu, 23 Mar 2023 11:22:22 -0400
From:   Yazen Ghannam <yazen.ghannam@....com>
To:     Tony Luck <tony.luck@...el.com>
Cc:     Borislav Petkov <bp@...en8.de>,
        Smita.KoralahalliChannabasappa@....com,
        dave.hansen@...ux.intel.com, hpa@...or.com,
        linux-edac@...r.kernel.org, linux-kernel@...r.kernel.org,
        x86@...nel.org, patches@...ts.linux.dev
Subject: Re: [PATCH v3 3/5] x86/mce: Introduce mce_handle_storm() to deal
 with begin/end of storms

On Fri, Mar 17, 2023 at 10:20:40AM -0700, Tony Luck wrote:
> From: Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
> 
> Intel and AMD need to take different actions when a storm begins or
> ends. Prepare for the storm code moving from intel.c into core.c by
> adding a function that checks CPU vendor to pick the right action.
> 
> No functional changes.
> 
> [Tony: Changed from function pointer to regular function]
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  arch/x86/kernel/cpu/mce/internal.h |  3 +++
>  arch/x86/kernel/cpu/mce/core.c     |  9 +++++++++
>  arch/x86/kernel/cpu/mce/intel.c    | 12 ++++++++++--
>  3 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
> index 72fbec8f6c3c..f37816b4d4cf 100644
> --- a/arch/x86/kernel/cpu/mce/internal.h
> +++ b/arch/x86/kernel/cpu/mce/internal.h
> @@ -43,12 +43,14 @@ extern mce_banks_t mce_banks_ce_disabled;
>  void track_cmci_storm(int bank, u64 status);
>  
>  #ifdef CONFIG_X86_MCE_INTEL
> +void mce_intel_handle_storm(int bank, bool on);
>  void cmci_disable_bank(int bank);
>  void intel_init_cmci(void);
>  void intel_init_lmce(void);
>  void intel_clear_lmce(void);
>  bool intel_filter_mce(struct mce *m);
>  #else
> +static inline void mce_intel_handle_storm(int bank, bool on) { }
>  static inline void cmci_disable_bank(int bank) { }
>  static inline void intel_init_cmci(void) { }
>  static inline void intel_init_lmce(void) { }
> @@ -57,6 +59,7 @@ static inline bool intel_filter_mce(struct mce *m) { return false; }
>  #endif
>  
>  void mce_timer_kick(bool storm);
> +void mce_handle_storm(int bank, bool on);
>  
>  #ifdef CONFIG_ACPI_APEI
>  int apei_write_mce(struct mce *m);
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 776d4724b1e0..f4d2a7ba29f7 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1985,6 +1985,15 @@ static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
>  	intel_clear_lmce();
>  }
>  
> +void mce_handle_storm(int bank, bool on)
> +{
> +	switch (boot_cpu_data.x86_vendor) {
> +	case X86_VENDOR_INTEL:
> +		mce_intel_handle_storm(bank, on);
> +		break;
> +	}
> +}
> +
>  static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
>  {
>  	switch (c->x86_vendor) {
> diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
> index 4106877de028..4238b73c2143 100644
> --- a/arch/x86/kernel/cpu/mce/intel.c
> +++ b/arch/x86/kernel/cpu/mce/intel.c
> @@ -152,6 +152,14 @@ static void cmci_set_threshold(int bank, int thresh)
>  	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
>  }
>  
> +void mce_intel_handle_storm(int bank, bool on)
> +{
> +	if (on)
> +		cmci_set_threshold(bank, cmci_threshold[bank]);
> +	else
> +		cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);

I think these conditions are reversed. When storm handling is 'on' we should
use CMCI_STORM_THRESHOLD, and when off use the saved bank threshold.

> +}
> +
>  static void cmci_storm_begin(int bank)
>  {
>  	__set_bit(bank, this_cpu_ptr(mce_poll_banks));
> @@ -211,13 +219,13 @@ void track_cmci_storm(int bank, u64 status)
>  		if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD - 1, 0))
>  			return;
>  		pr_notice("CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), bank);
> -		cmci_set_threshold(bank, cmci_threshold[bank]);
> +		mce_handle_storm(bank, true);

Should be 'false' when the storm subsides.

>  		cmci_storm_end(bank);
>  	} else {
>  		if (hweight64(history) < STORM_BEGIN_THRESHOLD)
>  			return;
>  		pr_notice("CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), bank);
> -		cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
> +		mce_handle_storm(bank, false);

Should be 'true' when the storm starts.

>  		cmci_storm_begin(bank);
>  	}
>  }
> --

Thanks,
Yazen

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ