linux-kernel - Re: [PATCH v8 6/6] LoongArch: Add pv ipi support on guest kernel side

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fa76a04d-e321-ff34-3d94-f528d28c5793@loongson.cn>
Date: Mon, 6 May 2024 15:00:11 +0800
From: maobibo <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
 Paolo Bonzini <pbonzini@...hat.com>, Jonathan Corbet <corbet@....net>,
 loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org,
 virtualization@...ts.linux.dev, kvm@...r.kernel.org
Subject: Re: [PATCH v8 6/6] LoongArch: Add pv ipi support on guest kernel side



On 2024/5/6 上午9:53, Huacai Chen wrote:
> Hi, Bibo,
> 
> On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>
>> PARAVIRT option and pv ipi is added on guest kernel side, function
>> pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function
>> firstly checks whether system runs on VM mode. If kernel runs on VM mode,
>> it will call function kvm_para_available() to detect current hypervirsor
>> type. Now only KVM type detection is supported, the paravirt function can
>> work only if current hypervisor type is KVM, since there is only KVM
>> supported on LoongArch now.
>>
>> PV IPI uses virtual IPI sender and virtual IPI receiver function. With
>> virutal IPI sender, ipi message is stored in DDR memory rather than
>> emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs
>> at the same time like X86 KVM method. Hypercall method is used for IPI
>> sending.
>>
>> With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since
>> VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt
>> acknowledge. And IPI message is stored in DDR, no trap in get IPI message.
>>
>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>> ---
>>   arch/loongarch/Kconfig                        |   9 ++
>>   arch/loongarch/include/asm/hardirq.h          |   1 +
>>   arch/loongarch/include/asm/paravirt.h         |  27 ++++
>>   .../include/asm/paravirt_api_clock.h          |   1 +
>>   arch/loongarch/kernel/Makefile                |   1 +
>>   arch/loongarch/kernel/irq.c                   |   2 +-
>>   arch/loongarch/kernel/paravirt.c              | 151 ++++++++++++++++++
>>   arch/loongarch/kernel/smp.c                   |   4 +-
>>   8 files changed, 194 insertions(+), 2 deletions(-)
>>   create mode 100644 arch/loongarch/include/asm/paravirt.h
>>   create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
>>   create mode 100644 arch/loongarch/kernel/paravirt.c
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 54ad04dacdee..0a1540a8853e 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -583,6 +583,15 @@ config CPU_HAS_PREFETCH
>>          bool
>>          default y
>>
>> +config PARAVIRT
>> +       bool "Enable paravirtualization code"
>> +       depends on AS_HAS_LVZ_EXTENSION
>> +       help
>> +          This changes the kernel so it can modify itself when it is run
>> +         under a hypervisor, potentially improving performance significantly
>> +         over full virtualization.  However, when run without a hypervisor
>> +         the kernel is theoretically slower and slightly larger.
>> +
>>   config ARCH_SUPPORTS_KEXEC
>>          def_bool y
>>
>> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
>> index 9f0038e19c7f..b26d596a73aa 100644
>> --- a/arch/loongarch/include/asm/hardirq.h
>> +++ b/arch/loongarch/include/asm/hardirq.h
>> @@ -21,6 +21,7 @@ enum ipi_msg_type {
>>   typedef struct {
>>          unsigned int ipi_irqs[NR_IPI];
>>          unsigned int __softirq_pending;
>> +       atomic_t message ____cacheline_aligned_in_smp;
>>   } ____cacheline_aligned irq_cpustat_t;
>>
>>   DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>> diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
>> new file mode 100644
>> index 000000000000..58f7b7b89f2c
>> --- /dev/null
>> +++ b/arch/loongarch/include/asm/paravirt.h
>> @@ -0,0 +1,27 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ASM_LOONGARCH_PARAVIRT_H
>> +#define _ASM_LOONGARCH_PARAVIRT_H
>> +
>> +#ifdef CONFIG_PARAVIRT
>> +#include <linux/static_call_types.h>
>> +struct static_key;
>> +extern struct static_key paravirt_steal_enabled;
>> +extern struct static_key paravirt_steal_rq_enabled;
>> +
>> +u64 dummy_steal_clock(int cpu);
>> +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
>> +
>> +static inline u64 paravirt_steal_clock(int cpu)
>> +{
>> +       return static_call(pv_steal_clock)(cpu);
>> +}
>> +
>> +int pv_ipi_init(void);
>> +#else
>> +static inline int pv_ipi_init(void)
>> +{
>> +       return 0;
>> +}
>> +
>> +#endif // CONFIG_PARAVIRT
>> +#endif
>> diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h
>> new file mode 100644
>> index 000000000000..65ac7cee0dad
>> --- /dev/null
>> +++ b/arch/loongarch/include/asm/paravirt_api_clock.h
>> @@ -0,0 +1 @@
>> +#include <asm/paravirt.h>
>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>> index 3a7620b66bc6..c9bfeda89e40 100644
>> --- a/arch/loongarch/kernel/Makefile
>> +++ b/arch/loongarch/kernel/Makefile
>> @@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES)         += module.o module-sections.o
>>   obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>
>>   obj-$(CONFIG_PROC_FS)          += proc.o
>> +obj-$(CONFIG_PARAVIRT)         += paravirt.o
>>
>>   obj-$(CONFIG_SMP)              += smp.o
>>
>> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
>> index ce36897d1e5a..4863e6c1b739 100644
>> --- a/arch/loongarch/kernel/irq.c
>> +++ b/arch/loongarch/kernel/irq.c
>> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
>>                          per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
>>          }
>>
>> -       set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> +       set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>>   }
>> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
>> new file mode 100644
>> index 000000000000..9044ed62045c
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/paravirt.c
>> @@ -0,0 +1,151 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/export.h>
>> +#include <linux/types.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/jump_label.h>
>> +#include <linux/kvm_para.h>
>> +#include <asm/paravirt.h>
>> +#include <linux/static_call.h>
>> +
>> +struct static_key paravirt_steal_enabled;
>> +struct static_key paravirt_steal_rq_enabled;
>> +
>> +static u64 native_steal_clock(int cpu)
>> +{
>> +       return 0;
>> +}
>> +
>> +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>> +
>> +#ifdef CONFIG_SMP
>> +static void pv_send_ipi_single(int cpu, unsigned int action)
>> +{
>> +       unsigned int min, old;
>> +       irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
>> +
>> +       old = atomic_fetch_or(BIT(action), &info->message);
>> +       if (old)
>> +               return;
>> +
>> +       min = cpu_logical_map(cpu);
>> +       kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, 1, 0, min);
>> +}
>> +
>> +#define KVM_IPI_CLUSTER_SIZE           (2 * BITS_PER_LONG)
>> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> +{
>> +       unsigned int cpu, i, min = 0, max = 0, old;
>> +       __uint128_t bitmap = 0;
>> +       irq_cpustat_t *info;
>> +
>> +       if (cpumask_empty(mask))
>> +               return;
>> +
>> +       action = BIT(action);
>> +       for_each_cpu(i, mask) {
>> +               info = &per_cpu(irq_stat, i);
>> +               old = atomic_fetch_or(action, &info->message);
>> +               if (old)
>> +                       continue;
>> +
>> +               cpu = cpu_logical_map(i);
>> +               if (!bitmap) {
>> +                       min = max = cpu;
>> +               } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
>> +                       max = cpu > max ? cpu : max;
>> +               } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
>> +                       bitmap <<= min - cpu;
>> +                       min = cpu;
>> +               } else {
>> +                       /*
>> +                        * Physical cpuid is sorted in ascending order ascend
>> +                        * for the next mask calculation, send IPI here
>> +                        * directly and skip the remainding cpus
>> +                        */
>> +                       kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI,
>> +                               (unsigned long)bitmap,
>> +                               (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +                       min = max = cpu;
>> +                       bitmap = 0;
>> +               }
> I have changed the logic and comments when I apply, you can double
> check whether it is correct.
There is modification like this:
                 if (!bitmap) {
                         min = max = cpu;
                 } else if (cpu < min && cpu > (max - 
KVM_IPI_CLUSTER_SIZE)) {
                 	...

By test there will be problem if value of max is smaller than 
KVM_IPI_CLUSTER_SIZE, since type of cpu/max is "unsigned int".

How about define the variable as int? the patch is like this:
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -35,7 +35,7 @@ static void pv_send_ipi_single(int cpu, unsigned int 
action)

  static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int 
action)
  {
-       unsigned int cpu, i, min = 0, max = 0, old;
+       int cpu, i, min = 0, max = 0, old;
         __uint128_t bitmap = 0;
         irq_cpustat_t *info;


Regards
Bibo Mao
> 
> Huacai
> 
>> +               __set_bit(cpu - min, (unsigned long *)&bitmap);
>> +       }
>> +
>> +       if (bitmap)
>> +               kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap,
>> +                               (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +}
>> +
>> +static irqreturn_t loongson_do_swi(int irq, void *dev)
>> +{
>> +       irq_cpustat_t *info;
>> +       long action;
>> +
>> +       /* Clear swi interrupt */
>> +       clear_csr_estat(1 << INT_SWI0);
>> +       info = this_cpu_ptr(&irq_stat);
>> +       action = atomic_xchg(&info->message, 0);
>> +       if (action & SMP_CALL_FUNCTION) {
>> +               generic_smp_call_function_interrupt();
>> +               info->ipi_irqs[IPI_CALL_FUNCTION]++;
>> +       }
>> +
>> +       if (action & SMP_RESCHEDULE) {
>> +               scheduler_ipi();
>> +               info->ipi_irqs[IPI_RESCHEDULE]++;
>> +       }
>> +
>> +       return IRQ_HANDLED;
>> +}
>> +
>> +static void pv_init_ipi(void)
>> +{
>> +       int r, swi0;
>> +
>> +       swi0 = get_percpu_irq(INT_SWI0);
>> +       if (swi0 < 0)
>> +               panic("SWI0 IRQ mapping failed\n");
>> +       irq_set_percpu_devid(swi0);
>> +       r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
>> +       if (r < 0)
>> +               panic("SWI0 IRQ request failed\n");
>> +}
>> +#endif
>> +
>> +static bool kvm_para_available(void)
>> +{
>> +       static int hypervisor_type;
>> +       int config;
>> +
>> +       if (!hypervisor_type) {
>> +               config = read_cpucfg(CPUCFG_KVM_SIG);
>> +               if (!memcmp(&config, KVM_SIGNATURE, 4))
>> +                       hypervisor_type = HYPERVISOR_KVM;
>> +       }
>> +
>> +       return hypervisor_type == HYPERVISOR_KVM;
>> +}
>> +
>> +int __init pv_ipi_init(void)
>> +{
>> +       int feature;
>> +
>> +       if (!cpu_has_hypervisor)
>> +               return 0;
>> +       if (!kvm_para_available())
>> +               return 0;
>> +
>> +       /*
>> +        * check whether KVM hypervisor supports pv_ipi or not
>> +        */
>> +       feature = read_cpucfg(CPUCFG_KVM_FEATURE);
>> +#ifdef CONFIG_SMP
>> +       if (feature & KVM_FEATURE_PV_IPI) {
>> +               smp_ops.init_ipi                = pv_init_ipi;
>> +               smp_ops.send_ipi_single         = pv_send_ipi_single;
>> +               smp_ops.send_ipi_mask           = pv_send_ipi_mask;
>> +       }
>> +#endif
>> +
>> +       return 1;
>> +}
>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>> index 1fce775be4f6..9eff7aa4c552 100644
>> --- a/arch/loongarch/kernel/smp.c
>> +++ b/arch/loongarch/kernel/smp.c
>> @@ -29,6 +29,7 @@
>>   #include <asm/loongson.h>
>>   #include <asm/mmu_context.h>
>>   #include <asm/numa.h>
>> +#include <asm/paravirt.h>
>>   #include <asm/processor.h>
>>   #include <asm/setup.h>
>>   #include <asm/time.h>
>> @@ -309,6 +310,7 @@ void __init loongson_smp_setup(void)
>>          cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
>>          cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
>>
>> +       pv_ipi_init();
>>          iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
>>          pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
>>   }
>> @@ -352,7 +354,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>>   void loongson_init_secondary(void)
>>   {
>>          unsigned int cpu = smp_processor_id();
>> -       unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> +       unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>>                               ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>>
>>          change_csr_ecfg(ECFG0_IM, imask);
>> --
>> 2.39.3
>>