linux-kernel - Re: [PATCH v5 6/6] LoongArch: Add pv ipi support on LoongArch system

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <27f44c9a-ce2d-3666-6d8e-110370b3fbe0@loongson.cn>
Date: Mon, 26 Feb 2024 10:30:47 +0800
From: maobibo <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
 Paolo Bonzini <pbonzini@...hat.com>, loongarch@...ts.linux.dev,
 linux-kernel@...r.kernel.org, virtualization@...ts.linux.dev,
 kvm@...r.kernel.org
Subject: Re: [PATCH v5 6/6] LoongArch: Add pv ipi support on LoongArch system



On 2024/2/24 下午5:19, Huacai Chen wrote:
> Hi, Bibo,
> 
> On Thu, Feb 22, 2024 at 11:28 AM Bibo Mao <maobibo@...ngson.cn> wrote:
>>
>> On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
>> register access on ipi sending, and two iocsr access on ipi receiving
>> which is ipi interrupt handler. On VM mode all iocsr accessing will
>> cause VM to trap into hypervisor. So with one ipi hw notification
>> there will be three times of trap.
>>
>> PV ipi is added for VM, hypercall instruction is used for ipi sender,
>> and hypervisor will inject SWI to destination vcpu. During SWI interrupt
>> handler, only estat CSR register is written to clear irq. Estat CSR
>> register access will not trap into hypervisor. So with pv ipi supported,
>> there is one trap with pv ipi sender, and no trap with ipi receiver,
>> there is only one trap with ipi notification.
>>
>> Also this patch adds ipi multicast support, the method is similar with
>> x86. With ipi multicast support, ipi notification can be sent to at most
>> 128 vcpus at one time. It reduces trap times into hypervisor greatly.
>>
>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>> ---
>>   arch/loongarch/include/asm/hardirq.h   |   1 +
>>   arch/loongarch/include/asm/kvm_host.h  |   1 +
>>   arch/loongarch/include/asm/kvm_para.h  | 123 +++++++++++++++++++++++++
>>   arch/loongarch/include/asm/loongarch.h |   1 +
>>   arch/loongarch/kernel/irq.c            |   2 +-
>>   arch/loongarch/kernel/paravirt.c       | 112 ++++++++++++++++++++++
>>   arch/loongarch/kernel/setup.c          |   1 +
>>   arch/loongarch/kernel/smp.c            |   2 +-
>>   arch/loongarch/kvm/exit.c              |  73 ++++++++++++++-
>>   arch/loongarch/kvm/vcpu.c              |   1 +
>>   10 files changed, 313 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
>> index 9f0038e19c7f..b26d596a73aa 100644
>> --- a/arch/loongarch/include/asm/hardirq.h
>> +++ b/arch/loongarch/include/asm/hardirq.h
>> @@ -21,6 +21,7 @@ enum ipi_msg_type {
>>   typedef struct {
>>          unsigned int ipi_irqs[NR_IPI];
>>          unsigned int __softirq_pending;
>> +       atomic_t message ____cacheline_aligned_in_smp;
>>   } ____cacheline_aligned irq_cpustat_t;
>>
>>   DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index 3ba16ef1fe69..0b96c6303cf7 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
>>          u64 idle_exits;
>>          u64 cpucfg_exits;
>>          u64 signal_exits;
>> +       u64 hypercall_exits;
>>   };
>>
>>   #define KVM_MEM_HUGEPAGE_CAPABLE       (1UL << 0)
>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>> index af5d677a9052..a82bffbbf8a1 100644
>> --- a/arch/loongarch/include/asm/kvm_para.h
>> +++ b/arch/loongarch/include/asm/kvm_para.h
>> @@ -8,6 +8,9 @@
>>   #define HYPERVISOR_KVM                 1
>>   #define HYPERVISOR_VENDOR_SHIFT                8
>>   #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
>> +#define KVM_HCALL_CODE_PV_SERVICE      0
>> +#define KVM_HCALL_PV_SERVICE           HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE)
>> +#define  KVM_HCALL_FUNC_PV_IPI         1
>>
>>   /*
>>    * LoongArch hypercall return code
>> @@ -16,6 +19,126 @@
>>   #define KVM_HCALL_INVALID_CODE         -1UL
>>   #define KVM_HCALL_INVALID_PARAMETER    -2UL
>>
>> +/*
>> + * Hypercall interface for KVM hypervisor
>> + *
>> + * a0: function identifier
>> + * a1-a6: args
>> + * Return value will be placed in v0.
>> + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
>> + */
>> +static __always_inline long kvm_hypercall(u64 fid)
>> +{
>> +       register long ret asm("v0");
>> +       register unsigned long fun asm("a0") = fid;
>> +
>> +       __asm__ __volatile__(
>> +               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
>> +               : "=r" (ret)
>> +               : "r" (fun)
>> +               : "memory"
>> +               );
>> +
>> +       return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
>> +{
>> +       register long ret asm("v0");
>> +       register unsigned long fun asm("a0") = fid;
>> +       register unsigned long a1  asm("a1") = arg0;
>> +
>> +       __asm__ __volatile__(
>> +               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
>> +               : "=r" (ret)
>> +               : "r" (fun), "r" (a1)
>> +               : "memory"
>> +               );
>> +
>> +       return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall2(u64 fid,
>> +               unsigned long arg0, unsigned long arg1)
>> +{
>> +       register long ret asm("v0");
>> +       register unsigned long fun asm("a0") = fid;
>> +       register unsigned long a1  asm("a1") = arg0;
>> +       register unsigned long a2  asm("a2") = arg1;
>> +
>> +       __asm__ __volatile__(
>> +                       "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
>> +                       : "=r" (ret)
>> +                       : "r" (fun), "r" (a1), "r" (a2)
>> +                       : "memory"
>> +                       );
>> +
>> +       return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall3(u64 fid,
>> +       unsigned long arg0, unsigned long arg1, unsigned long arg2)
>> +{
>> +       register long ret asm("v0");
>> +       register unsigned long fun asm("a0") = fid;
>> +       register unsigned long a1  asm("a1") = arg0;
>> +       register unsigned long a2  asm("a2") = arg1;
>> +       register unsigned long a3  asm("a3") = arg2;
>> +
>> +       __asm__ __volatile__(
>> +               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
>> +               : "=r" (ret)
>> +               : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
>> +               : "memory"
>> +               );
>> +
>> +       return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall4(u64 fid,
>> +               unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> +               unsigned long arg3)
>> +{
>> +       register long ret asm("v0");
>> +       register unsigned long fun asm("a0") = fid;
>> +       register unsigned long a1  asm("a1") = arg0;
>> +       register unsigned long a2  asm("a2") = arg1;
>> +       register unsigned long a3  asm("a3") = arg2;
>> +       register unsigned long a4  asm("a4") = arg3;
>> +
>> +       __asm__ __volatile__(
>> +               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
>> +               : "=r" (ret)
>> +               : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
>> +               : "memory"
>> +               );
>> +
>> +       return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall5(u64 fid,
>> +               unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> +               unsigned long arg3, unsigned long arg4)
>> +{
>> +       register long ret asm("v0");
>> +       register unsigned long fun asm("a0") = fid;
>> +       register unsigned long a1  asm("a1") = arg0;
>> +       register unsigned long a2  asm("a2") = arg1;
>> +       register unsigned long a3  asm("a3") = arg2;
>> +       register unsigned long a4  asm("a4") = arg3;
>> +       register unsigned long a5  asm("a5") = arg4;
>> +
>> +       __asm__ __volatile__(
>> +               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
>> +               : "=r" (ret)
>> +               : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
>> +               : "memory"
>> +               );
>> +
>> +       return ret;
>> +}
>> +
>> +
>>   static inline unsigned int kvm_arch_para_features(void)
>>   {
>>          return 0;
>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>> index a1d22e8b6f94..0ad36704cb4b 100644
>> --- a/arch/loongarch/include/asm/loongarch.h
>> +++ b/arch/loongarch/include/asm/loongarch.h
>> @@ -167,6 +167,7 @@
>>   #define CPUCFG_KVM_SIG                 CPUCFG_KVM_BASE
>>   #define  KVM_SIGNATURE                 "KVM\0"
>>   #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
>> +#define  KVM_FEATURE_PV_IPI            BIT(1)
>>
>>   #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
>> index ce36897d1e5a..4863e6c1b739 100644
>> --- a/arch/loongarch/kernel/irq.c
>> +++ b/arch/loongarch/kernel/irq.c
>> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
>>                          per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
>>          }
>>
>> -       set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> +       set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>>   }
>> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
>> index 5cf794e8490f..4c30e1c73c72 100644
>> --- a/arch/loongarch/kernel/paravirt.c
>> +++ b/arch/loongarch/kernel/paravirt.c
>> @@ -1,6 +1,7 @@
>>   // SPDX-License-Identifier: GPL-2.0
>>   #include <linux/export.h>
>>   #include <linux/types.h>
>> +#include <linux/interrupt.h>
>>   #include <linux/jump_label.h>
>>   #include <linux/kvm_para.h>
>>   #include <asm/paravirt.h>
>> @@ -16,6 +17,103 @@ static u64 native_steal_clock(int cpu)
>>
>>   DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>>
>> +#ifdef CONFIG_SMP
>> +static void pv_send_ipi_single(int cpu, unsigned int action)
>> +{
>> +       unsigned int min, old;
>> +       unsigned long bitmap = 0;
>> +       irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
>> +
>> +       action = BIT(action);
>> +       old = atomic_fetch_or(action, &info->message);
>> +       if (old == 0) {
>> +               min = cpu_logical_map(cpu);
>> +               bitmap = 1;
>> +               kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, bitmap, 0, min);
>> +       }
> Early return style can make it a little easy, which means:
> 
> if (old)
>     return;
> 
> min = ......
> 
will do in next patch.

>> +}
>> +
>> +#define KVM_IPI_CLUSTER_SIZE           (2 * BITS_PER_LONG)
>> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> +{
>> +       unsigned int cpu, i, min = 0, max = 0, old;
>> +       __uint128_t bitmap = 0;
>> +       irq_cpustat_t *info;
>> +
>> +       if (cpumask_empty(mask))
>> +               return;
>> +
>> +       action = BIT(action);
>> +       for_each_cpu(i, mask) {
>> +               info = &per_cpu(irq_stat, i);
>> +               old = atomic_fetch_or(action, &info->message);
>> +               if (old)
>> +                       continue;
>> +
>> +               cpu = cpu_logical_map(i);
>> +               if (!bitmap) {
>> +                       min = max = cpu;
>> +               } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
>> +                       max = cpu > max ? cpu : max;
>> +               } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
>> +                       bitmap <<= min - cpu;
>> +                       min = cpu;
>> +               } else {
>> +                       /*
>> +                        * Physical cpuid is sorted in ascending order ascend
>> +                        * for the next mask calculation, send IPI here
>> +                        * directly and skip the remainding cpus
>> +                        */
>> +                       kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI,
>> +                               (unsigned long)bitmap,
>> +                               (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +                       min = max = cpu;
>> +                       bitmap = 0;
>> +               }
>> +               __set_bit(cpu - min, (unsigned long *)&bitmap);
>> +       }
>> +
>> +       if (bitmap)
>> +               kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap,
>> +                               (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +}
>> +
>> +static irqreturn_t loongson_do_swi(int irq, void *dev)
>> +{
>> +       irq_cpustat_t *info;
>> +       long action;
>> +
>> +       /* Clear swi interrupt */
>> +       clear_csr_estat(1 << INT_SWI0);
>> +       info = this_cpu_ptr(&irq_stat);
>> +       action = atomic_xchg(&info->message, 0);
>> +       if (action & SMP_CALL_FUNCTION) {
>> +               generic_smp_call_function_interrupt();
>> +               info->ipi_irqs[IPI_CALL_FUNCTION]++;
>> +       }
>> +
>> +       if (action & SMP_RESCHEDULE) {
>> +               scheduler_ipi();
>> +               info->ipi_irqs[IPI_RESCHEDULE]++;
>> +       }
>> +
>> +       return IRQ_HANDLED;
>> +}
>> +
>> +static void pv_init_ipi(void)
>> +{
>> +       int r, swi0;
>> +
>> +       swi0 = get_percpu_irq(INT_SWI0);
>> +       if (swi0 < 0)
>> +               panic("SWI0 IRQ mapping failed\n");
>> +       irq_set_percpu_devid(swi0);
>> +       r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
>> +       if (r < 0)
>> +               panic("SWI0 IRQ request failed\n");
>> +}
>> +#endif
>> +
>>   static bool kvm_para_available(void)
>>   {
>>          static int hypervisor_type;
>> @@ -32,10 +130,24 @@ static bool kvm_para_available(void)
>>
>>   int __init pv_ipi_init(void)
>>   {
>> +       int feature;
>> +
>>          if (!cpu_has_hypervisor)
>>                  return 0;
>>          if (!kvm_para_available())
>>                  return 0;
>>
>> +       /*
>> +        * check whether KVM hypervisor supports pv_ipi or not
>> +        */
>> +       feature = read_cpucfg(CPUCFG_KVM_FEATURE);
>> +#ifdef CONFIG_SMP
>> +       if (feature & KVM_FEATURE_PV_IPI) {
>> +               smp_ops.init_ipi                = pv_init_ipi;
>> +               smp_ops.send_ipi_single         = pv_send_ipi_single;
>> +               smp_ops.send_ipi_mask           = pv_send_ipi_mask;
>> +       }
>> +#endif
>> +
>>          return 1;
>>   }
>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>> index b79a1244b56f..c95ed3224b7d 100644
>> --- a/arch/loongarch/kernel/setup.c
>> +++ b/arch/loongarch/kernel/setup.c
>> @@ -368,6 +368,7 @@ void __init platform_init(void)
>>          pr_info("The BIOS Version: %s\n", b_info.bios_version);
>>
>>          efi_runtime_init();
>> +       pv_ipi_init();
> Move the callsite to loongson_smp_setup() is better.
Will do in next patch.

Regards
Bibo Mao
> 
> Huacai
> 
>>   }
>>
>>   static void __init check_kernel_sections_mem(void)
>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>> index 2182e7cc2ed6..9e9fda1fe18a 100644
>> --- a/arch/loongarch/kernel/smp.c
>> +++ b/arch/loongarch/kernel/smp.c
>> @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>>   void loongson_init_secondary(void)
>>   {
>>          unsigned int cpu = smp_processor_id();
>> -       unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> +       unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>>                               ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>>
>>          change_csr_ecfg(ECFG0_IM, imask);
>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>> index 6a38fd59d86d..46940e97975b 100644
>> --- a/arch/loongarch/kvm/exit.c
>> +++ b/arch/loongarch/kvm/exit.c
>> @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>          case CPUCFG_KVM_SIG:
>>                  vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>>                  break;
>> +       case CPUCFG_KVM_FEATURE:
>> +               vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
>> +               break;
>>          default:
>>                  vcpu->arch.gprs[rd] = 0;
>>                  break;
>> @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
>>          return RESUME_GUEST;
>>   }
>>
>> +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
>> +{
>> +       unsigned long ipi_bitmap;
>> +       unsigned int min, cpu, i;
>> +       struct kvm_vcpu *dest;
>> +
>> +       min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
>> +       for (i = 0; i < 2; i++) {
>> +               ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
>> +               if (!ipi_bitmap)
>> +                       continue;
>> +
>> +               cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
>> +               while (cpu < BITS_PER_LONG) {
>> +                       dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
>> +                       cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
>> +                                       cpu + 1);
>> +                       if (!dest)
>> +                               continue;
>> +
>> +                       /*
>> +                        * Send SWI0 to dest vcpu to emulate IPI interrupt
>> +                        */
>> +                       kvm_queue_irq(dest, INT_SWI0);
>> +                       kvm_vcpu_kick(dest);
>> +               }
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +/*
>> + * hypercall emulation always return to guest, Caller should check retval.
>> + */
>> +static void kvm_handle_pv_service(struct kvm_vcpu *vcpu)
>> +{
>> +       unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
>> +       long ret;
>> +
>> +       switch (func) {
>> +       case KVM_HCALL_FUNC_PV_IPI:
>> +               kvm_pv_send_ipi(vcpu);
>> +               ret = KVM_HCALL_STATUS_SUCCESS;
>> +               break;
>> +       default:
>> +               ret = KVM_HCALL_INVALID_CODE;
>> +               break;
>> +       };
>> +
>> +       vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
>> +}
>> +
>>   static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
>>   {
>> +       larch_inst inst;
>> +       unsigned int code;
>> +
>> +       inst.word = vcpu->arch.badi;
>> +       code = inst.reg0i15_format.immediate;
>>          update_pc(&vcpu->arch);
>>
>> -       /* Treat it as noop intruction, only set return value */
>> -       vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
>> +       switch (code) {
>> +       case KVM_HCALL_PV_SERVICE:
>> +               vcpu->stat.hypercall_exits++;
>> +               kvm_handle_pv_service(vcpu);
>> +               break;
>> +       default:
>> +               /* Treat it as noop intruction, only set return value */
>> +               vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
>> +               break;
>> +       }
>> +
>>          return RESUME_GUEST;
>>   }
>>
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 40296d8ef297..24fd5e4647f3 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
>>          STATS_DESC_COUNTER(VCPU, idle_exits),
>>          STATS_DESC_COUNTER(VCPU, cpucfg_exits),
>>          STATS_DESC_COUNTER(VCPU, signal_exits),
>> +       STATS_DESC_COUNTER(VCPU, hypercall_exits)
>>   };
>>
>>   const struct kvm_stats_header kvm_vcpu_stats_header = {
>> --
>> 2.39.3
>>
>>