lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAhV-H6iOwoYCCob6TmFf1boKQHb0=Mim2bWFvZCMfi9Rw5FPQ@mail.gmail.com>
Date: Wed, 30 Apr 2025 11:20:57 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: Gregory CLEMENT <gregory.clement@...tlin.com>
Cc: Thomas Bogendoerfer <tsbogend@...ha.franken.de>, Jiaxun Yang <jiaxun.yang@...goat.com>, 
	Vladimir Kondratiev <vladimir.kondratiev@...ileye.com>, Théo Lebrun <theo.lebrun@...tlin.com>, 
	Tawfik Bayouk <tawfik.bayouk@...ileye.com>, 
	Thomas Petazzoni <thomas.petazzoni@...tlin.com>, linux-mips@...r.kernel.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ

Hi, Gregory,

On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@...nel.org> wrote:
>
> Hi, Gregory and Thomas,
>
> I'm sorry I'm late, but I have some questions about this patch.
>
> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
> <gregory.clement@...tlin.com> wrote:
> >
> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
> >
> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
> >
> > Signed-off-by: Gregory CLEMENT <gregory.clement@...tlin.com>
> > ---
> > Hello,
> >
> > This patch allows CPUs to start in parallel. It has been tested on
> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> > systems use CPS to support SMP.
> >
> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> > faster.
> >
> > Currently, this support is only for EyeQ SoC. However, it should also
> > work for other CPUs using CPS. I am less sure about MT ASE support,
> > but this patch can be a good starting point. If anyone wants to add
> > support for other systems, I can share some ideas, especially for the
> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
> >
> > Gregory
> > ---
> >  arch/mips/Kconfig                |  2 ++
> >  arch/mips/include/asm/topology.h |  3 +++
> >  arch/mips/kernel/smp-cps.c       |  2 ++
> >  arch/mips/kernel/smp.c           | 18 ++++++++++++++++++
> >  4 files changed, 25 insertions(+)
> >
> > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> > index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
> > --- a/arch/mips/Kconfig
> > +++ b/arch/mips/Kconfig
> > @@ -617,6 +617,7 @@ config EYEQ
> >         select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
> >         select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
> >         select USE_OF
> > +       select HOTPLUG_PARALLEL if SMP
> >         help
> >           Select this to build a kernel supporting EyeQ SoC from Mobileye.
> >
> > @@ -2287,6 +2288,7 @@ config MIPS_CPS
> >         select MIPS_CM
> >         select MIPS_CPS_PM if HOTPLUG_CPU
> >         select SMP
> > +       select HOTPLUG_SMT if HOTPLUG_PARALLEL
> >         select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
> >         select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
> >         select SYS_SUPPORTS_HOTPLUG_CPU
> > diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
> > index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
> > --- a/arch/mips/include/asm/topology.h
> > +++ b/arch/mips/include/asm/topology.h
> > @@ -16,6 +16,9 @@
> >  #define topology_core_id(cpu)                  (cpu_core(&cpu_data[cpu]))
> >  #define topology_core_cpumask(cpu)             (&cpu_core_map[cpu])
> >  #define topology_sibling_cpumask(cpu)          (&cpu_sibling_map[cpu])
> > +
> > +extern struct cpumask __cpu_primary_thread_mask;
> > +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
> >  #endif
> >
> >  #endif /* __ASM_TOPOLOGY_H */
> > diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> > index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
> > --- a/arch/mips/kernel/smp-cps.c
> > +++ b/arch/mips/kernel/smp-cps.c
> > @@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
> >                         /* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
> >                         if (!cl && !c)
> >                                 smp_num_siblings = core_vpes;
> > +                       cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
> >
> >                         for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
> >                                 cpu_set_cluster(&cpu_data[nvpes + v], cl);
> > @@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
> >         cl = cpu_cluster(&current_cpu_data);
> >         c = cpu_core(&current_cpu_data);
> >         cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
> > +       cpu_smt_set_num_threads(core_vpes, core_vpes);
> >         core_bootcfg = &cluster_bootcfg->core_config[c];
> >         bitmap_set(cluster_bootcfg->core_power, cpu_core(&current_cpu_data), 1);
> >         atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(&current_cpu_data));
> > diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> > index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
> > --- a/arch/mips/kernel/smp.c
> > +++ b/arch/mips/kernel/smp.c
> > @@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
> >  cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
> >  EXPORT_SYMBOL(cpu_core_map);
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> >  static DECLARE_COMPLETION(cpu_starting);
> >  static DECLARE_COMPLETION(cpu_running);
> > +#endif
> >
> >  /*
> >   * A logical cpu mask containing only one VPE per core to
> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
> >
> >  cpumask_t cpu_coherent_mask;
> >
> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
> > +
> >  unsigned int smp_max_threads __initdata = UINT_MAX;
> >
> >  static int __init early_nosmt(char *s)
> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> >         set_cpu_core_map(cpu);
> >
> >         cpumask_set_cpu(cpu, &cpu_coherent_mask);
> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> > +       cpuhp_ap_sync_alive();
> This is a "synchronization point" due to the description from commit
> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
> before this point and serialized after this point.
>
> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
> be executed in parallel. Maybe you haven't observed problems, but in
> theory it's not correct.
I don't know whether you have done reboot tests (for ~1000 times),
Jiaxun Yang submitted similar patches for LoongArch [1], but during
reboot tests we encountered problems that I have described in my
previous reply.

[1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/

Huacai

>
> Huacai
>
> > +#endif
> >         notify_cpu_starting(cpu);
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> >         /* Notify boot CPU that we're starting & ready to sync counters */
> >         complete(&cpu_starting);
> > +#endif
> >
> >         synchronise_count_slave(cpu);
> >
> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
> >
> >         calculate_cpu_foreign_map();
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> >         /*
> >          * Notify boot CPU that we're up & online and it can safely return
> >          * from __cpu_up
> >          */
> >         complete(&cpu_running);
> > +#endif
> >
> >         /*
> >          * irq will be enabled in ->smp_finish(), enabling it too early
> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
> >         set_cpu_online(0, true);
> >  }
> >
> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
> > +{
> > +       return mp_ops->boot_secondary(cpu, tidle);
> > +}
> > +#else
> >  int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> >  {
> >         int err;
> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> >         wait_for_completion(&cpu_running);
> >         return 0;
> >  }
> > +#endif
> >
> >  #ifdef CONFIG_PROFILING
> >  /* Not really SMP stuff ... */
> >
> > ---
> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
> >
> > Best regards,
> > --
> > Grégory CLEMENT, Bootlin
> > Embedded Linux and Kernel engineering
> > https://bootlin.com
> >
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ