[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAhV-H6iOwoYCCob6TmFf1boKQHb0=Mim2bWFvZCMfi9Rw5FPQ@mail.gmail.com>
Date: Wed, 30 Apr 2025 11:20:57 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: Gregory CLEMENT <gregory.clement@...tlin.com>
Cc: Thomas Bogendoerfer <tsbogend@...ha.franken.de>, Jiaxun Yang <jiaxun.yang@...goat.com>,
Vladimir Kondratiev <vladimir.kondratiev@...ileye.com>, Théo Lebrun <theo.lebrun@...tlin.com>,
Tawfik Bayouk <tawfik.bayouk@...ileye.com>,
Thomas Petazzoni <thomas.petazzoni@...tlin.com>, linux-mips@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
Hi, Gregory,
On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@...nel.org> wrote:
>
> Hi, Gregory and Thomas,
>
> I'm sorry I'm late, but I have some questions about this patch.
>
> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
> <gregory.clement@...tlin.com> wrote:
> >
> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
> >
> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
> >
> > Signed-off-by: Gregory CLEMENT <gregory.clement@...tlin.com>
> > ---
> > Hello,
> >
> > This patch allows CPUs to start in parallel. It has been tested on
> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> > systems use CPS to support SMP.
> >
> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> > faster.
> >
> > Currently, this support is only for EyeQ SoC. However, it should also
> > work for other CPUs using CPS. I am less sure about MT ASE support,
> > but this patch can be a good starting point. If anyone wants to add
> > support for other systems, I can share some ideas, especially for the
> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
> >
> > Gregory
> > ---
> > arch/mips/Kconfig | 2 ++
> > arch/mips/include/asm/topology.h | 3 +++
> > arch/mips/kernel/smp-cps.c | 2 ++
> > arch/mips/kernel/smp.c | 18 ++++++++++++++++++
> > 4 files changed, 25 insertions(+)
> >
> > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> > index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
> > --- a/arch/mips/Kconfig
> > +++ b/arch/mips/Kconfig
> > @@ -617,6 +617,7 @@ config EYEQ
> > select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
> > select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
> > select USE_OF
> > + select HOTPLUG_PARALLEL if SMP
> > help
> > Select this to build a kernel supporting EyeQ SoC from Mobileye.
> >
> > @@ -2287,6 +2288,7 @@ config MIPS_CPS
> > select MIPS_CM
> > select MIPS_CPS_PM if HOTPLUG_CPU
> > select SMP
> > + select HOTPLUG_SMT if HOTPLUG_PARALLEL
> > select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
> > select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
> > select SYS_SUPPORTS_HOTPLUG_CPU
> > diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
> > index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
> > --- a/arch/mips/include/asm/topology.h
> > +++ b/arch/mips/include/asm/topology.h
> > @@ -16,6 +16,9 @@
> > #define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
> > #define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
> > #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
> > +
> > +extern struct cpumask __cpu_primary_thread_mask;
> > +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
> > #endif
> >
> > #endif /* __ASM_TOPOLOGY_H */
> > diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> > index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
> > --- a/arch/mips/kernel/smp-cps.c
> > +++ b/arch/mips/kernel/smp-cps.c
> > @@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
> > /* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
> > if (!cl && !c)
> > smp_num_siblings = core_vpes;
> > + cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
> >
> > for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
> > cpu_set_cluster(&cpu_data[nvpes + v], cl);
> > @@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
> > cl = cpu_cluster(¤t_cpu_data);
> > c = cpu_core(¤t_cpu_data);
> > cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
> > + cpu_smt_set_num_threads(core_vpes, core_vpes);
> > core_bootcfg = &cluster_bootcfg->core_config[c];
> > bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1);
> > atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data));
> > diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> > index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
> > --- a/arch/mips/kernel/smp.c
> > +++ b/arch/mips/kernel/smp.c
> > @@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
> > cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
> > EXPORT_SYMBOL(cpu_core_map);
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> > static DECLARE_COMPLETION(cpu_starting);
> > static DECLARE_COMPLETION(cpu_running);
> > +#endif
> >
> > /*
> > * A logical cpu mask containing only one VPE per core to
> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
> >
> > cpumask_t cpu_coherent_mask;
> >
> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
> > +
> > unsigned int smp_max_threads __initdata = UINT_MAX;
> >
> > static int __init early_nosmt(char *s)
> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> > set_cpu_core_map(cpu);
> >
> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> > + cpuhp_ap_sync_alive();
> This is a "synchronization point" due to the description from commit
> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
> before this point and serialized after this point.
>
> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
> be executed in parallel. Maybe you haven't observed problems, but in
> theory it's not correct.
I don't know whether you have done reboot tests (for ~1000 times),
Jiaxun Yang submitted similar patches for LoongArch [1], but during
reboot tests we encountered problems that I have described in my
previous reply.
[1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/
Huacai
>
> Huacai
>
> > +#endif
> > notify_cpu_starting(cpu);
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> > /* Notify boot CPU that we're starting & ready to sync counters */
> > complete(&cpu_starting);
> > +#endif
> >
> > synchronise_count_slave(cpu);
> >
> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
> >
> > calculate_cpu_foreign_map();
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> > /*
> > * Notify boot CPU that we're up & online and it can safely return
> > * from __cpu_up
> > */
> > complete(&cpu_running);
> > +#endif
> >
> > /*
> > * irq will be enabled in ->smp_finish(), enabling it too early
> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
> > set_cpu_online(0, true);
> > }
> >
> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
> > +{
> > + return mp_ops->boot_secondary(cpu, tidle);
> > +}
> > +#else
> > int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> > {
> > int err;
> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> > wait_for_completion(&cpu_running);
> > return 0;
> > }
> > +#endif
> >
> > #ifdef CONFIG_PROFILING
> > /* Not really SMP stuff ... */
> >
> > ---
> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
> >
> > Best regards,
> > --
> > Grégory CLEMENT, Bootlin
> > Embedded Linux and Kernel engineering
> > https://bootlin.com
> >
> >
Powered by blists - more mailing lists