[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ccf57fd2-45b8-1f1f-f46a-55d7f4c56161@cybernetics.com>
Date: Wed, 26 Apr 2023 13:37:52 -0400
From: Tony Battersby <tonyb@...ernetics.com>
To: Thomas Gleixner <tglx@...utronix.de>,
Dave Hansen <dave.hansen@...el.com>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org
Cc: "H. Peter Anvin" <hpa@...or.com>,
Mario Limonciello <mario.limonciello@....com>,
Tom Lendacky <thomas.lendacky@....com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Andi Kleen <ak@...ux.intel.com>
Subject: Re: [PATCH RFC] x86/cpu: fix intermittent lockup on poweroff
On 4/26/23 12:37, Thomas Gleixner wrote:
> The problem really seems to be that the control CPU goes off before the
> other CPUs have finished and depending on timing that causes the
> wreckage. Otherwise the mdelay(100) would not have helped at all.
>
> But looking at it, that num_online_cpus() == 1 check in
> stop_other_cpus() is fragile as hell independent of that wbinvd() issue.
>
> Something like the completely untested below should cure that.
>
> Thanks,
>
> tglx
> ---
> arch/x86/include/asm/cpu.h | 2 ++
> arch/x86/kernel/process.c | 10 ++++++++++
> arch/x86/kernel/smp.c | 15 ++++++++++++---
> 3 files changed, 24 insertions(+), 3 deletions(-)
>
> --- a/arch/x86/include/asm/cpu.h
> +++ b/arch/x86/include/asm/cpu.h
> @@ -98,4 +98,6 @@ extern u64 x86_read_arch_cap_msr(void);
> int intel_find_matching_signature(void *mc, unsigned int csig, int cpf);
> int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
>
> +extern atomic_t stop_cpus_count;
> +
> #endif /* _ASM_X86_CPU_H */
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -752,6 +752,8 @@ bool xen_set_default_idle(void)
> }
> #endif
>
> +atomic_t stop_cpus_count;
> +
> void __noreturn stop_this_cpu(void *dummy)
> {
> local_irq_disable();
> @@ -776,6 +778,14 @@ void __noreturn stop_this_cpu(void *dumm
> */
> if (cpuid_eax(0x8000001f) & BIT(0))
> native_wbinvd();
> +
> + /*
> + * native_stop_other_cpus() will write to @stop_cpus_count after
> + * observing that it went down to zero, which will invalidate the
> + * cacheline on this CPU.
> + */
> + atomic_dec(&stop_cpus_count);
> +
> for (;;) {
> /*
> * Use native_halt() so that memory contents don't change
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -27,6 +27,7 @@
> #include <asm/mmu_context.h>
> #include <asm/proto.h>
> #include <asm/apic.h>
> +#include <asm/cpu.h>
> #include <asm/idtentry.h>
> #include <asm/nmi.h>
> #include <asm/mce.h>
> @@ -171,6 +172,8 @@ static void native_stop_other_cpus(int w
> if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
> return;
>
> + atomic_set(&stop_cpus_count, num_online_cpus() - 1);
> +
> /* sync above data before sending IRQ */
> wmb();
>
> @@ -183,12 +186,12 @@ static void native_stop_other_cpus(int w
> * CPUs reach shutdown state.
> */
> timeout = USEC_PER_SEC;
> - while (num_online_cpus() > 1 && timeout--)
> + while (atomic_read(&stop_cpus_count) > 0 && timeout--)
> udelay(1);
> }
>
> /* if the REBOOT_VECTOR didn't work, try with the NMI */
> - if (num_online_cpus() > 1) {
> + if (atomic_read(&stop_cpus_count) > 0) {
> /*
> * If NMI IPI is enabled, try to register the stop handler
> * and send the IPI. In any case try to wait for the other
> @@ -208,7 +211,7 @@ static void native_stop_other_cpus(int w
> * one or more CPUs do not reach shutdown state.
> */
> timeout = USEC_PER_MSEC * 10;
> - while (num_online_cpus() > 1 && (wait || timeout--))
> + while (atomic_read(&stop_cpus_count) > 0 && (wait || timeout--))
> udelay(1);
> }
>
> @@ -216,6 +219,12 @@ static void native_stop_other_cpus(int w
> disable_local_APIC();
> mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
> local_irq_restore(flags);
> +
> + /*
> + * Ensure that the cache line is invalidated on the other CPUs. See
> + * comment vs. SME in stop_this_cpu().
> + */
> + atomic_set(&stop_cpus_count, INT_MAX);
> }
>
> /*
>
Tested-by: Tony Battersby <tonyb@...ernetics.com>
10 successful poweroffs in a row with wbinvd() enabled. As I mentioned
before though, I don't have an AMD CPU to test the SME cache
invalidation logic.
I will reply with my patch with an updated title and description.
Tony
Powered by blists - more mailing lists