[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a52a9cfe-0c8a-dd10-2d5c-eb2817deb3da@loongson.cn>
Date: Mon, 19 Feb 2024 12:18:17 +0800
From: maobibo <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
Paolo Bonzini <pbonzini@...hat.com>, loongarch@...ts.linux.dev,
linux-kernel@...r.kernel.org, virtualization@...ts.linux.dev,
kvm@...r.kernel.org
Subject: Re: [PATCH v4 6/6] LoongArch: Add pv ipi support on LoongArch system
On 2024/2/19 上午10:45, Huacai Chen wrote:
> Hi, Bibo,
>
> On Thu, Feb 1, 2024 at 11:20 AM Bibo Mao <maobibo@...ngson.cn> wrote:
>>
>> On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
>> register access on ipi sending, and two iocsr access on ipi receiving
>> which is ipi interrupt handler. On VM mode all iocsr registers
>> accessing will cause VM to trap into hypervisor. So with ipi hw
>> notification once there will be three times of trap.
>>
>> This patch adds pv ipi support for VM, hypercall instruction is used
>> to ipi sender, and hypervisor will inject SWI on the VM. During SWI
>> interrupt handler, only estat CSR register is written to clear irq.
>> Estat CSR register access will not trap into hypervisor. So with pv ipi
>> supported, pv ipi sender will trap into hypervsor one time, pv ipi
>> revicer will not trap, there is only one time of trap.
>>
>> Also this patch adds ipi multicast support, the method is similar with
>> x86. With ipi multicast support, ipi notification can be sent to at most
>> 128 vcpus at one time. It reduces trap times into hypervisor greatly.
>>
>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>> ---
>> arch/loongarch/include/asm/hardirq.h | 1 +
>> arch/loongarch/include/asm/kvm_host.h | 1 +
>> arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++
>> arch/loongarch/include/asm/loongarch.h | 1 +
>> arch/loongarch/kernel/irq.c | 2 +-
>> arch/loongarch/kernel/paravirt.c | 113 ++++++++++++++++++++++
>> arch/loongarch/kernel/smp.c | 2 +-
>> arch/loongarch/kvm/exit.c | 73 ++++++++++++++-
>> arch/loongarch/kvm/vcpu.c | 1 +
>> 9 files changed, 314 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
>> index 9f0038e19c7f..8a611843c1f0 100644
>> --- a/arch/loongarch/include/asm/hardirq.h
>> +++ b/arch/loongarch/include/asm/hardirq.h
>> @@ -21,6 +21,7 @@ enum ipi_msg_type {
>> typedef struct {
>> unsigned int ipi_irqs[NR_IPI];
>> unsigned int __softirq_pending;
>> + atomic_t messages ____cacheline_aligned_in_smp;
> Do we really need atomic_t? A plain "unsigned int" can reduce cost
> significantly.
For IPI, there are multiple senders and one receiver, the sender uses
atomic_fetch_or(action, &info->messages) and the receiver uses
atomic_xchg(&info->messages, 0) to clear message.
There needs sync mechanism between senders and receiver, atomic is the
most simple method.
>
>> } ____cacheline_aligned irq_cpustat_t;
>>
>> DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index 57399d7cf8b7..1bf927e2bfac 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
>> u64 idle_exits;
>> u64 cpucfg_exits;
>> u64 signal_exits;
>> + u64 hvcl_exits;
> hypercall_exits is better.
yeap, hypercall_exits is better, will fix in next version.
>
>> };
>>
>> #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>> index 41200e922a82..a25a84e372b9 100644
>> --- a/arch/loongarch/include/asm/kvm_para.h
>> +++ b/arch/loongarch/include/asm/kvm_para.h
>> @@ -9,6 +9,10 @@
>> #define HYPERVISOR_VENDOR_SHIFT 8
>> #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
>>
>> +#define KVM_HC_CODE_SERVICE 0
>> +#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
>> +#define KVM_HC_FUNC_IPI 1
> Change HC to HCALL is better.
will modify in next version.
>
>> +
>> /*
>> * LoongArch hypcall return code
>> */
>> @@ -16,6 +20,126 @@
>> #define KVM_HC_INVALID_CODE -1UL
>> #define KVM_HC_INVALID_PARAMETER -2UL
>>
>> +/*
>> + * Hypercalls interface for KVM hypervisor
>> + *
>> + * a0: function identifier
>> + * a1-a6: args
>> + * Return value will be placed in v0.
>> + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
>> + */
>> +static __always_inline long kvm_hypercall(u64 fid)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall2(u64 fid,
>> + unsigned long arg0, unsigned long arg1)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1), "r" (a2)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall3(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall4(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> + unsigned long arg3)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> + register unsigned long a4 asm("a4") = arg3;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall5(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> + unsigned long arg3, unsigned long arg4)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> + register unsigned long a4 asm("a4") = arg3;
>> + register unsigned long a5 asm("a5") = arg4;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +
>> static inline unsigned int kvm_arch_para_features(void)
>> {
>> return 0;
>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>> index a1d22e8b6f94..0ad36704cb4b 100644
>> --- a/arch/loongarch/include/asm/loongarch.h
>> +++ b/arch/loongarch/include/asm/loongarch.h
>> @@ -167,6 +167,7 @@
>> #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
>> #define KVM_SIGNATURE "KVM\0"
>> #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
>> +#define KVM_FEATURE_PV_IPI BIT(1)
>>
>> #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
>> index ce36897d1e5a..4863e6c1b739 100644
>> --- a/arch/loongarch/kernel/irq.c
>> +++ b/arch/loongarch/kernel/irq.c
>> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
>> per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
>> }
>>
>> - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> }
>> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
>> index 21d01d05791a..7a8319df401c 100644
>> --- a/arch/loongarch/kernel/paravirt.c
>> +++ b/arch/loongarch/kernel/paravirt.c
>> @@ -1,6 +1,7 @@
>> // SPDX-License-Identifier: GPL-2.0
>> #include <linux/export.h>
>> #include <linux/types.h>
>> +#include <linux/interrupt.h>
>> #include <linux/jump_label.h>
>> #include <linux/kvm_para.h>
>> #include <asm/paravirt.h>
>> @@ -16,6 +17,104 @@ static u64 native_steal_clock(int cpu)
>>
>> DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>>
>> +#ifdef CONFIG_SMP
>> +static void pv_send_ipi_single(int cpu, unsigned int action)
>> +{
>> + unsigned int min, old;
>> + unsigned long bitmap = 0;
>> + irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
>> +
>> + action = BIT(action);
>> + old = atomic_fetch_or(action, &info->messages);
>> + if (old == 0) {
>> + min = cpu_logical_map(cpu);
>> + bitmap = 1;
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, bitmap, 0, min);
>> + }
>> +}
>> +
>> +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
>> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> +{
>> + unsigned int cpu, i, min = 0, max = 0, old;
>> + __uint128_t bitmap = 0;
>> + irq_cpustat_t *info;
>> +
>> + if (cpumask_empty(mask))
>> + return;
>> +
>> + action = BIT(action);
>> + for_each_cpu(i, mask) {
>> + info = &per_cpu(irq_stat, i);
>> + old = atomic_fetch_or(action, &info->messages);
>> + if (old)
>> + continue;
>> +
>> + cpu = cpu_logical_map(i);
>> + if (!bitmap) {
>> + min = max = cpu;
>> + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
>> + max = cpu > max ? cpu : max;
>> + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
>> + bitmap <<= min - cpu;
>> + min = cpu;
>> + } else {
>> + /*
>> + * Physical cpuid is sorted in ascending order ascend
>> + * for the next mask calculation, send IPI here
>> + * directly and skip the remainding cpus
>> + */
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
>> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> + min = max = cpu;
>> + bitmap = 0;
>> + }
>> + __set_bit(cpu - min, (unsigned long *)&bitmap);
>> + }
>> +
>> + if (bitmap)
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
>> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +}
>> +
>> +static irqreturn_t loongson_do_swi(int irq, void *dev)
>> +{
>> + irq_cpustat_t *info;
>> + long action;
>> +
>> + clear_csr_estat(1 << INT_SWI0);
>> +
>> + info = this_cpu_ptr(&irq_stat);
>> + do {
>> + action = atomic_xchg(&info->messages, 0);
>> + if (action & SMP_CALL_FUNCTION) {
>> + generic_smp_call_function_interrupt();
>> + info->ipi_irqs[IPI_CALL_FUNCTION]++;
>> + }
>> +
>> + if (action & SMP_RESCHEDULE) {
>> + scheduler_ipi();
>> + info->ipi_irqs[IPI_RESCHEDULE]++;
>> + }
>> + } while (action);
>> +
>> + return IRQ_HANDLED;
>> +}
>> +
>> +static void pv_init_ipi(void)
>> +{
>> + int r, swi0;
>> +
>> + swi0 = get_percpu_irq(INT_SWI0);
>> + if (swi0 < 0)
>> + panic("SWI0 IRQ mapping failed\n");
>> + irq_set_percpu_devid(swi0);
>> + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
>> + if (r < 0)
>> + panic("SWI0 IRQ request failed\n");
>> +}
>> +#endif
>> +
>> static bool kvm_para_available(void)
>> {
>> static int hypervisor_type;
>> @@ -32,10 +131,24 @@ static bool kvm_para_available(void)
>>
>> int __init pv_guest_init(void)
>> {
>> + int feature;
>> +
>> if (!cpu_has_hypervisor)
>> return 0;
>> if (!kvm_para_available())
>> return 0;
>>
>> + /*
>> + * check whether KVM hypervisor supports pv_ipi or not
>> + */
>> +#ifdef CONFIG_SMP
>> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
> This line can be moved out of CONFIG_SMP, especially features will
> increase in future.
Good suggestion, will modify in next version.
Regards
Bibo Mao
>
> Huacai
>
>> + if (feature & KVM_FEATURE_PV_IPI) {
>> + smp_ops.init_ipi = pv_init_ipi;
>> + smp_ops.send_ipi_single = pv_send_ipi_single;
>> + smp_ops.send_ipi_mask = pv_send_ipi_mask;
>> + }
>> +#endif
>> +
>> return 1;
>> }
>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>> index 3d3ec07d1ec4..d50443879353 100644
>> --- a/arch/loongarch/kernel/smp.c
>> +++ b/arch/loongarch/kernel/smp.c
>> @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>> void loongson_init_secondary(void)
>> {
>> unsigned int cpu = smp_processor_id();
>> - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>>
>> change_csr_ecfg(ECFG0_IM, imask);
>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>> index f4e4df05f578..189b70bad825 100644
>> --- a/arch/loongarch/kvm/exit.c
>> +++ b/arch/loongarch/kvm/exit.c
>> @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>> case CPUCFG_KVM_SIG:
>> vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>> break;
>> + case CPUCFG_KVM_FEATURE:
>> + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
>> + break;
>> default:
>> vcpu->arch.gprs[rd] = 0;
>> break;
>> @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
>> return RESUME_GUEST;
>> }
>>
>> +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
>> +{
>> + unsigned long ipi_bitmap;
>> + unsigned int min, cpu, i;
>> + struct kvm_vcpu *dest;
>> +
>> + min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
>> + for (i = 0; i < 2; i++) {
>> + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
>> + if (!ipi_bitmap)
>> + continue;
>> +
>> + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
>> + while (cpu < BITS_PER_LONG) {
>> + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
>> + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
>> + cpu + 1);
>> + if (!dest)
>> + continue;
>> +
>> + /*
>> + * Send SWI0 to dest vcpu to emulate IPI interrupt
>> + */
>> + kvm_queue_irq(dest, INT_SWI0);
>> + kvm_vcpu_kick(dest);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * hypcall emulation always return to guest, Caller should check retval.
>> + */
>> +static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
> Rename to kvm_handle_hypecall_service() is better.
>
>
> Huacai
>> +{
>> + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
>> + long ret;
>> +
>> + switch (func) {
>> + case KVM_HC_FUNC_IPI:
>> + kvm_pv_send_ipi(vcpu);
>> + ret = KVM_HC_STATUS_SUCCESS;
>> + break;
>> + default:
>> + ret = KVM_HC_INVALID_CODE;
>> + break;
>> + };
>> +
>> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
>> +}
>> +
>> static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
>> {
>> + larch_inst inst;
>> + unsigned int code;
>> +
>> + inst.word = vcpu->arch.badi;
>> + code = inst.reg0i15_format.immediate;
>> update_pc(&vcpu->arch);
>>
>> - /* Treat it as noop intruction, only set return value */
>> - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
>> + switch (code) {
>> + case KVM_HC_SERVICE:
>> + vcpu->stat.hvcl_exits++;
>> + kvm_handle_pv_hcall(vcpu);
>> + break;
>> + default:
>> + /* Treat it as noop intruction, only set return value */
>> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
>> + break;
>> + }
>> +
>> return RESUME_GUEST;
>> }
>>
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 97ca9c7160e6..80e05ba9b48d 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
>> STATS_DESC_COUNTER(VCPU, idle_exits),
>> STATS_DESC_COUNTER(VCPU, cpucfg_exits),
>> STATS_DESC_COUNTER(VCPU, signal_exits),
>> + STATS_DESC_COUNTER(VCPU, hvcl_exits)
>> };
>>
>> const struct kvm_stats_header kvm_vcpu_stats_header = {
>> --
>> 2.39.3
>>
Powered by blists - more mailing lists