lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8734mg92pt.ffs@tglx>
Date: Tue, 03 Sep 2024 17:27:58 +0200
From: Thomas Gleixner <tglx@...utronix.de>
To: Jan Kiszka <jan.kiszka@...mens.com>, Henning Schild
 <henning.schild@...mens.com>
Cc: Peter Zijlstra <peterz@...radead.org>, x86@...nel.org,
 linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...hat.com>, Borislav
 Petkov <bp@...en8.de>, Guenter Roeck <linux@...ck-us.net>,
 xenomai@...omai.org, guocai.he.cn@...driver.com
Subject: Re: sched: Unexpected reschedule of offline CPU#2!

On Tue, Jul 27 2021 at 10:46, Jan Kiszka wrote:

Picking up this dead thread again.

> What is supposed to prevent the following in mainline:
>
> CPU 0                   CPU 1                      CPU 2
>
> native_stop_other_cpus                             <INTERRUPT>
>   send_IPI_allbutself                              ...
>                         <INTERRUPT>
>                         sysvec_reboot
>                           stop_this_cpu
>                             set_cpu_online(false)
>                                                    native_smp_send_reschedule(1)
>                                                      if (cpu_is_offline(1)) ...

Nothing. And that's what probably happens if I read the stack trace
correctly.

But we can be slightly smarter about this for the reboot IPI (the NMI
case does not have that issue).

CPU 0                   CPU 1                      CPU 2

native_stop_other_cpus                             <INTERRUPT>
   send_IPI_allbutself                              ...
                         <IPI>
                         sysvec_reboot
                           wait_for_others();
                                                    </INTERRUPT>
                                                    <IPI>
                                                    sysvec_reboot
                                                    wait_for_others();
                           stop_this_cpu();           stop_this_cpu();
                             set_cpu_online(false);     set_cpu_online(false);

Something like the uncompiled below.

Thanks,

        tglx
---
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -68,5 +68,6 @@ bool intel_find_matching_signature(void
 int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
 
 extern struct cpumask cpus_stop_mask;
+atomic_t cpus_stop_in_ipi;
 
 #endif /* _ASM_X86_CPU_H */
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -721,7 +721,7 @@ bool xen_set_default_idle(void);
 #define xen_set_default_idle 0
 #endif
 
-void __noreturn stop_this_cpu(void *dummy);
+void __noreturn stop_this_cpu(bool sync);
 void microcode_check(struct cpuinfo_x86 *prev_info);
 void store_cpu_caps(struct cpuinfo_x86 *info);
 
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -791,9 +791,10 @@ bool xen_set_default_idle(void)
 }
 #endif
 
+atomic_t cpus_stop_in_ipi;
 struct cpumask cpus_stop_mask;
 
-void __noreturn stop_this_cpu(void *dummy)
+void __noreturn stop_this_cpu(bool sync)
 {
 	struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
 	unsigned int cpu = smp_processor_id();
@@ -801,6 +802,16 @@ void __noreturn stop_this_cpu(void *dumm
 	local_irq_disable();
 
 	/*
+	 * Account this CPU and loop until the other CPUs reached this
+	 * point. If they don't react, the control CPU will raise an NMI.
+	 */
+	if (sync) {
+		atomic_dec(&cpus_stop_in_ipi);
+		while (atomic_read(&cpus_stop_in_ipi))
+			cpu_relax();
+	}
+
+	/*
 	 * Remove this CPU from the online mask and disable it
 	 * unconditionally. This might be redundant in case that the reboot
 	 * vector was handled late and stop_other_cpus() sent an NMI.
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -788,7 +788,7 @@ static void native_machine_halt(void)
 
 	tboot_shutdown(TB_SHUTDOWN_HALT);
 
-	stop_this_cpu(NULL);
+	stop_this_cpu(false);
 }
 
 static void native_machine_power_off(void)
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -125,7 +125,7 @@ static int smp_stop_nmi_callback(unsigne
 		return NMI_HANDLED;
 
 	cpu_emergency_disable_virtualization();
-	stop_this_cpu(NULL);
+	stop_this_cpu(false);
 
 	return NMI_HANDLED;
 }
@@ -137,7 +137,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
 {
 	apic_eoi();
 	cpu_emergency_disable_virtualization();
-	stop_this_cpu(NULL);
+	stop_this_cpu(true);
 }
 
 static int register_stop_handler(void)
@@ -189,6 +189,7 @@ static void native_stop_other_cpus(int w
 	 */
 	cpumask_copy(&cpus_stop_mask, cpu_online_mask);
 	cpumask_clear_cpu(this_cpu, &cpus_stop_mask);
+	atomic_set(&cpus_stop_in_ipi, num_online_cpus() - 1);
 
 	if (!cpumask_empty(&cpus_stop_mask)) {
 		apic_send_IPI_allbutself(REBOOT_VECTOR);
@@ -235,10 +236,12 @@ static void native_stop_other_cpus(int w
 	local_irq_restore(flags);
 
 	/*
-	 * Ensure that the cpus_stop_mask cache lines are invalidated on
-	 * the other CPUs. See comment vs. SME in stop_this_cpu().
+	 * Ensure that the cpus_stop_mask and cpus_stop_in_ipi cache lines
+	 * are invalidated on the other CPUs. See comment vs. SME in
+	 * stop_this_cpu().
 	 */
 	cpumask_clear(&cpus_stop_mask);
+	atomic_set(&cpus_stop_in_ipi, 0);
 }
 
 /*


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ