[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAhV-H4XSnHBNCitcxPxF7LitCdVHhnVR9DDwaUgEuwbGkqBGA@mail.gmail.com>
Date: Mon, 6 May 2024 15:04:03 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: maobibo <maobibo@...ngson.cn>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
Paolo Bonzini <pbonzini@...hat.com>, Jonathan Corbet <corbet@....net>, loongarch@...ts.linux.dev,
linux-kernel@...r.kernel.org, virtualization@...ts.linux.dev,
kvm@...r.kernel.org
Subject: Re: [PATCH v8 6/6] LoongArch: Add pv ipi support on guest kernel side
On Mon, May 6, 2024 at 3:00 PM maobibo <maobibo@...ngson.cn> wrote:
>
>
>
> On 2024/5/6 上午9:53, Huacai Chen wrote:
> > Hi, Bibo,
> >
> > On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@...ngson.cn> wrote:
> >>
> >> PARAVIRT option and pv ipi is added on guest kernel side, function
> >> pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function
> >> firstly checks whether system runs on VM mode. If kernel runs on VM mode,
> >> it will call function kvm_para_available() to detect current hypervirsor
> >> type. Now only KVM type detection is supported, the paravirt function can
> >> work only if current hypervisor type is KVM, since there is only KVM
> >> supported on LoongArch now.
> >>
> >> PV IPI uses virtual IPI sender and virtual IPI receiver function. With
> >> virutal IPI sender, ipi message is stored in DDR memory rather than
> >> emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs
> >> at the same time like X86 KVM method. Hypercall method is used for IPI
> >> sending.
> >>
> >> With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since
> >> VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt
> >> acknowledge. And IPI message is stored in DDR, no trap in get IPI message.
> >>
> >> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
> >> ---
> >> arch/loongarch/Kconfig | 9 ++
> >> arch/loongarch/include/asm/hardirq.h | 1 +
> >> arch/loongarch/include/asm/paravirt.h | 27 ++++
> >> .../include/asm/paravirt_api_clock.h | 1 +
> >> arch/loongarch/kernel/Makefile | 1 +
> >> arch/loongarch/kernel/irq.c | 2 +-
> >> arch/loongarch/kernel/paravirt.c | 151 ++++++++++++++++++
> >> arch/loongarch/kernel/smp.c | 4 +-
> >> 8 files changed, 194 insertions(+), 2 deletions(-)
> >> create mode 100644 arch/loongarch/include/asm/paravirt.h
> >> create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
> >> create mode 100644 arch/loongarch/kernel/paravirt.c
> >>
> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >> index 54ad04dacdee..0a1540a8853e 100644
> >> --- a/arch/loongarch/Kconfig
> >> +++ b/arch/loongarch/Kconfig
> >> @@ -583,6 +583,15 @@ config CPU_HAS_PREFETCH
> >> bool
> >> default y
> >>
> >> +config PARAVIRT
> >> + bool "Enable paravirtualization code"
> >> + depends on AS_HAS_LVZ_EXTENSION
> >> + help
> >> + This changes the kernel so it can modify itself when it is run
> >> + under a hypervisor, potentially improving performance significantly
> >> + over full virtualization. However, when run without a hypervisor
> >> + the kernel is theoretically slower and slightly larger.
> >> +
> >> config ARCH_SUPPORTS_KEXEC
> >> def_bool y
> >>
> >> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
> >> index 9f0038e19c7f..b26d596a73aa 100644
> >> --- a/arch/loongarch/include/asm/hardirq.h
> >> +++ b/arch/loongarch/include/asm/hardirq.h
> >> @@ -21,6 +21,7 @@ enum ipi_msg_type {
> >> typedef struct {
> >> unsigned int ipi_irqs[NR_IPI];
> >> unsigned int __softirq_pending;
> >> + atomic_t message ____cacheline_aligned_in_smp;
> >> } ____cacheline_aligned irq_cpustat_t;
> >>
> >> DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
> >> diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
> >> new file mode 100644
> >> index 000000000000..58f7b7b89f2c
> >> --- /dev/null
> >> +++ b/arch/loongarch/include/asm/paravirt.h
> >> @@ -0,0 +1,27 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +#ifndef _ASM_LOONGARCH_PARAVIRT_H
> >> +#define _ASM_LOONGARCH_PARAVIRT_H
> >> +
> >> +#ifdef CONFIG_PARAVIRT
> >> +#include <linux/static_call_types.h>
> >> +struct static_key;
> >> +extern struct static_key paravirt_steal_enabled;
> >> +extern struct static_key paravirt_steal_rq_enabled;
> >> +
> >> +u64 dummy_steal_clock(int cpu);
> >> +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
> >> +
> >> +static inline u64 paravirt_steal_clock(int cpu)
> >> +{
> >> + return static_call(pv_steal_clock)(cpu);
> >> +}
> >> +
> >> +int pv_ipi_init(void);
> >> +#else
> >> +static inline int pv_ipi_init(void)
> >> +{
> >> + return 0;
> >> +}
> >> +
> >> +#endif // CONFIG_PARAVIRT
> >> +#endif
> >> diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h
> >> new file mode 100644
> >> index 000000000000..65ac7cee0dad
> >> --- /dev/null
> >> +++ b/arch/loongarch/include/asm/paravirt_api_clock.h
> >> @@ -0,0 +1 @@
> >> +#include <asm/paravirt.h>
> >> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >> index 3a7620b66bc6..c9bfeda89e40 100644
> >> --- a/arch/loongarch/kernel/Makefile
> >> +++ b/arch/loongarch/kernel/Makefile
> >> @@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
> >> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> >>
> >> obj-$(CONFIG_PROC_FS) += proc.o
> >> +obj-$(CONFIG_PARAVIRT) += paravirt.o
> >>
> >> obj-$(CONFIG_SMP) += smp.o
> >>
> >> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
> >> index ce36897d1e5a..4863e6c1b739 100644
> >> --- a/arch/loongarch/kernel/irq.c
> >> +++ b/arch/loongarch/kernel/irq.c
> >> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
> >> per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
> >> }
> >>
> >> - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
> >> + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
> >> }
> >> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
> >> new file mode 100644
> >> index 000000000000..9044ed62045c
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/paravirt.c
> >> @@ -0,0 +1,151 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +#include <linux/export.h>
> >> +#include <linux/types.h>
> >> +#include <linux/interrupt.h>
> >> +#include <linux/jump_label.h>
> >> +#include <linux/kvm_para.h>
> >> +#include <asm/paravirt.h>
> >> +#include <linux/static_call.h>
> >> +
> >> +struct static_key paravirt_steal_enabled;
> >> +struct static_key paravirt_steal_rq_enabled;
> >> +
> >> +static u64 native_steal_clock(int cpu)
> >> +{
> >> + return 0;
> >> +}
> >> +
> >> +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
> >> +
> >> +#ifdef CONFIG_SMP
> >> +static void pv_send_ipi_single(int cpu, unsigned int action)
> >> +{
> >> + unsigned int min, old;
> >> + irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
> >> +
> >> + old = atomic_fetch_or(BIT(action), &info->message);
> >> + if (old)
> >> + return;
> >> +
> >> + min = cpu_logical_map(cpu);
> >> + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, 1, 0, min);
> >> +}
> >> +
> >> +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
> >> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> >> +{
> >> + unsigned int cpu, i, min = 0, max = 0, old;
> >> + __uint128_t bitmap = 0;
> >> + irq_cpustat_t *info;
> >> +
> >> + if (cpumask_empty(mask))
> >> + return;
> >> +
> >> + action = BIT(action);
> >> + for_each_cpu(i, mask) {
> >> + info = &per_cpu(irq_stat, i);
> >> + old = atomic_fetch_or(action, &info->message);
> >> + if (old)
> >> + continue;
> >> +
> >> + cpu = cpu_logical_map(i);
> >> + if (!bitmap) {
> >> + min = max = cpu;
> >> + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
> >> + max = cpu > max ? cpu : max;
> >> + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
> >> + bitmap <<= min - cpu;
> >> + min = cpu;
> >> + } else {
> >> + /*
> >> + * Physical cpuid is sorted in ascending order ascend
> >> + * for the next mask calculation, send IPI here
> >> + * directly and skip the remainding cpus
> >> + */
> >> + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI,
> >> + (unsigned long)bitmap,
> >> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
> >> + min = max = cpu;
> >> + bitmap = 0;
> >> + }
> > I have changed the logic and comments when I apply, you can double
> > check whether it is correct.
> There is modification like this:
> if (!bitmap) {
> min = max = cpu;
> } else if (cpu < min && cpu > (max -
> KVM_IPI_CLUSTER_SIZE)) {
> ...
>
> By test there will be problem if value of max is smaller than
> KVM_IPI_CLUSTER_SIZE, since type of cpu/max is "unsigned int".
>
> How about define the variable as int? the patch is like this:
> --- a/arch/loongarch/kernel/paravirt.c
> +++ b/arch/loongarch/kernel/paravirt.c
> @@ -35,7 +35,7 @@ static void pv_send_ipi_single(int cpu, unsigned int
> action)
>
> static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int
> action)
> {
> - unsigned int cpu, i, min = 0, max = 0, old;
> + int cpu, i, min = 0, max = 0, old;
> __uint128_t bitmap = 0;
> irq_cpustat_t *info;
Make sense, I will update this line.
Huacai
>
>
> Regards
> Bibo Mao
> >
> > Huacai
> >
> >> + __set_bit(cpu - min, (unsigned long *)&bitmap);
> >> + }
> >> +
> >> + if (bitmap)
> >> + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap,
> >> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
> >> +}
> >> +
> >> +static irqreturn_t loongson_do_swi(int irq, void *dev)
> >> +{
> >> + irq_cpustat_t *info;
> >> + long action;
> >> +
> >> + /* Clear swi interrupt */
> >> + clear_csr_estat(1 << INT_SWI0);
> >> + info = this_cpu_ptr(&irq_stat);
> >> + action = atomic_xchg(&info->message, 0);
> >> + if (action & SMP_CALL_FUNCTION) {
> >> + generic_smp_call_function_interrupt();
> >> + info->ipi_irqs[IPI_CALL_FUNCTION]++;
> >> + }
> >> +
> >> + if (action & SMP_RESCHEDULE) {
> >> + scheduler_ipi();
> >> + info->ipi_irqs[IPI_RESCHEDULE]++;
> >> + }
> >> +
> >> + return IRQ_HANDLED;
> >> +}
> >> +
> >> +static void pv_init_ipi(void)
> >> +{
> >> + int r, swi0;
> >> +
> >> + swi0 = get_percpu_irq(INT_SWI0);
> >> + if (swi0 < 0)
> >> + panic("SWI0 IRQ mapping failed\n");
> >> + irq_set_percpu_devid(swi0);
> >> + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
> >> + if (r < 0)
> >> + panic("SWI0 IRQ request failed\n");
> >> +}
> >> +#endif
> >> +
> >> +static bool kvm_para_available(void)
> >> +{
> >> + static int hypervisor_type;
> >> + int config;
> >> +
> >> + if (!hypervisor_type) {
> >> + config = read_cpucfg(CPUCFG_KVM_SIG);
> >> + if (!memcmp(&config, KVM_SIGNATURE, 4))
> >> + hypervisor_type = HYPERVISOR_KVM;
> >> + }
> >> +
> >> + return hypervisor_type == HYPERVISOR_KVM;
> >> +}
> >> +
> >> +int __init pv_ipi_init(void)
> >> +{
> >> + int feature;
> >> +
> >> + if (!cpu_has_hypervisor)
> >> + return 0;
> >> + if (!kvm_para_available())
> >> + return 0;
> >> +
> >> + /*
> >> + * check whether KVM hypervisor supports pv_ipi or not
> >> + */
> >> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
> >> +#ifdef CONFIG_SMP
> >> + if (feature & KVM_FEATURE_PV_IPI) {
> >> + smp_ops.init_ipi = pv_init_ipi;
> >> + smp_ops.send_ipi_single = pv_send_ipi_single;
> >> + smp_ops.send_ipi_mask = pv_send_ipi_mask;
> >> + }
> >> +#endif
> >> +
> >> + return 1;
> >> +}
> >> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> >> index 1fce775be4f6..9eff7aa4c552 100644
> >> --- a/arch/loongarch/kernel/smp.c
> >> +++ b/arch/loongarch/kernel/smp.c
> >> @@ -29,6 +29,7 @@
> >> #include <asm/loongson.h>
> >> #include <asm/mmu_context.h>
> >> #include <asm/numa.h>
> >> +#include <asm/paravirt.h>
> >> #include <asm/processor.h>
> >> #include <asm/setup.h>
> >> #include <asm/time.h>
> >> @@ -309,6 +310,7 @@ void __init loongson_smp_setup(void)
> >> cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
> >> cpu_data[0].package = cpu_logical_map(0) / loongson_sysconfcores_per_package;
> >>
> >> + pv_ipi_init();
> >> iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
> >> pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
> >> }
> >> @@ -352,7 +354,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
> >> void loongson_init_secondary(void)
> >> {
> >> unsigned int cpu = smp_processor_id();
> >> - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
> >> + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
> >> ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
> >>
> >> change_csr_ecfg(ECFG0_IM, imask);
> >> --
> >> 2.39.3
> >>
>
>
Powered by blists - more mailing lists