[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAAhV-H6JSKwWvLwPSK7Bu6jZixRn4U+xtpxGL4KBtsmjhc3PVA@mail.gmail.com>
Date: Sun, 27 Apr 2025 18:13:10 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: Gregory CLEMENT <gregory.clement@...tlin.com>
Cc: Thomas Bogendoerfer <tsbogend@...ha.franken.de>, Jiaxun Yang <jiaxun.yang@...goat.com>,
Vladimir Kondratiev <vladimir.kondratiev@...ileye.com>, Théo Lebrun <theo.lebrun@...tlin.com>,
Tawfik Bayouk <tawfik.bayouk@...ileye.com>,
Thomas Petazzoni <thomas.petazzoni@...tlin.com>, linux-mips@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
Hi, Gregory and Thomas,
I'm sorry I'm late, but I have some questions about this patch.
On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
<gregory.clement@...tlin.com> wrote:
>
> Added support for starting CPUs in parallel on EyeQ to speed up boot time.
>
> On EyeQ5, booting 8 CPUs is now ~90ms faster.
> On EyeQ6, booting 32 CPUs is now ~650ms faster.
>
> Signed-off-by: Gregory CLEMENT <gregory.clement@...tlin.com>
> ---
> Hello,
>
> This patch allows CPUs to start in parallel. It has been tested on
> EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> systems use CPS to support SMP.
>
> As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> faster.
>
> Currently, this support is only for EyeQ SoC. However, it should also
> work for other CPUs using CPS. I am less sure about MT ASE support,
> but this patch can be a good starting point. If anyone wants to add
> support for other systems, I can share some ideas, especially for the
> MIPS_GENERIC setup that needs to handle both types of SMP setups.
>
> Gregory
> ---
> arch/mips/Kconfig | 2 ++
> arch/mips/include/asm/topology.h | 3 +++
> arch/mips/kernel/smp-cps.c | 2 ++
> arch/mips/kernel/smp.c | 18 ++++++++++++++++++
> 4 files changed, 25 insertions(+)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -617,6 +617,7 @@ config EYEQ
> select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
> select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
> select USE_OF
> + select HOTPLUG_PARALLEL if SMP
> help
> Select this to build a kernel supporting EyeQ SoC from Mobileye.
>
> @@ -2287,6 +2288,7 @@ config MIPS_CPS
> select MIPS_CM
> select MIPS_CPS_PM if HOTPLUG_CPU
> select SMP
> + select HOTPLUG_SMT if HOTPLUG_PARALLEL
> select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
> select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
> select SYS_SUPPORTS_HOTPLUG_CPU
> diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
> index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
> --- a/arch/mips/include/asm/topology.h
> +++ b/arch/mips/include/asm/topology.h
> @@ -16,6 +16,9 @@
> #define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
> #define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
> #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
> +
> +extern struct cpumask __cpu_primary_thread_mask;
> +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
> #endif
>
> #endif /* __ASM_TOPOLOGY_H */
> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
> --- a/arch/mips/kernel/smp-cps.c
> +++ b/arch/mips/kernel/smp-cps.c
> @@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
> /* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
> if (!cl && !c)
> smp_num_siblings = core_vpes;
> + cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
>
> for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
> cpu_set_cluster(&cpu_data[nvpes + v], cl);
> @@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
> cl = cpu_cluster(¤t_cpu_data);
> c = cpu_core(¤t_cpu_data);
> cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
> + cpu_smt_set_num_threads(core_vpes, core_vpes);
> core_bootcfg = &cluster_bootcfg->core_config[c];
> bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1);
> atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data));
> diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
> cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
> EXPORT_SYMBOL(cpu_core_map);
>
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> static DECLARE_COMPLETION(cpu_starting);
> static DECLARE_COMPLETION(cpu_running);
> +#endif
>
> /*
> * A logical cpu mask containing only one VPE per core to
> @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
>
> cpumask_t cpu_coherent_mask;
>
> +struct cpumask __cpu_primary_thread_mask __read_mostly;
> +
> unsigned int smp_max_threads __initdata = UINT_MAX;
>
> static int __init early_nosmt(char *s)
> @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> set_cpu_core_map(cpu);
>
> cpumask_set_cpu(cpu, &cpu_coherent_mask);
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> + cpuhp_ap_sync_alive();
This is a "synchronization point" due to the description from commit
9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
before this point and serialized after this point.
But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
be executed in parallel. Maybe you haven't observed problems, but in
theory it's not correct.
Huacai
> +#endif
> notify_cpu_starting(cpu);
>
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> /* Notify boot CPU that we're starting & ready to sync counters */
> complete(&cpu_starting);
> +#endif
>
> synchronise_count_slave(cpu);
>
> @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
>
> calculate_cpu_foreign_map();
>
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> /*
> * Notify boot CPU that we're up & online and it can safely return
> * from __cpu_up
> */
> complete(&cpu_running);
> +#endif
>
> /*
> * irq will be enabled in ->smp_finish(), enabling it too early
> @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
> set_cpu_online(0, true);
> }
>
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
> +{
> + return mp_ops->boot_secondary(cpu, tidle);
> +}
> +#else
> int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> {
> int err;
> @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> wait_for_completion(&cpu_running);
> return 0;
> }
> +#endif
>
> #ifdef CONFIG_PROFILING
> /* Not really SMP stuff ... */
>
> ---
> base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
> change-id: 20250411-parallel-cpu-bringup-78999a9235ea
>
> Best regards,
> --
> Grégory CLEMENT, Bootlin
> Embedded Linux and Kernel engineering
> https://bootlin.com
>
>
Powered by blists - more mailing lists