[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b10b46ce-8219-8863-470f-9bfa173b22b0@loongson.cn>
Date: Mon, 6 May 2024 16:18:16 +0800
From: maobibo <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
Paolo Bonzini <pbonzini@...hat.com>, Jonathan Corbet <corbet@....net>,
loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org,
virtualization@...ts.linux.dev, kvm@...r.kernel.org
Subject: Re: [PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from
physical cpuid
On 2024/5/6 下午3:06, Huacai Chen wrote:
> Hi, Bibo,
>
> On Mon, May 6, 2024 at 2:36 PM maobibo <maobibo@...ngson.cn> wrote:
>>
>>
>>
>> On 2024/5/6 上午9:49, Huacai Chen wrote:
>>> Hi, Bibo,
>>>
>>> On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>>
>>>> Physical cpuid is used for interrupt routing for irqchips such as
>>>> ipi/msi/extioi interrupt controller. And physical cpuid is stored
>>>> at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
>>>> is created and physical cpuid of two vcpus cannot be the same.
>>>>
>>>> Different irqchips have different size declaration about physical cpuid,
>>>> max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
>>>> supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
>>>> for MSI irqchip.
>>>>
>>>> The smallest value from all interrupt controllers is selected now,
>>>> and the max cpuid size is defines as 256 by KVM which comes from
>>>> extioi irqchip.
>>>>
>>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>>> ---
>>>> arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
>>>> arch/loongarch/include/asm/kvm_vcpu.h | 1 +
>>>> arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++-
>>>> arch/loongarch/kvm/vm.c | 11 ++++
>>>> 4 files changed, 130 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>> index 2d62f7b0d377..3ba16ef1fe69 100644
>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>>>>
>>>> #define MAX_PGTABLE_LEVELS 4
>>>>
>>>> +/*
>>>> + * Physical cpu id is used for interrupt routing, there are different
>>>> + * definitions about physical cpuid on different hardwares.
>>>> + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
>>>> + * For IPI HW, max dest CPUID size 1024
>>>> + * For extioi interrupt controller, max dest CPUID size is 256
>>>> + * For MSI interrupt controller, max supported CPUID size is 65536
>>>> + *
>>>> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
>>>> + * it will be expanded to 4096, including 16 packages at most. And every
>>>> + * package supports at most 256 vcpus
>>>> + */
>>>> +#define KVM_MAX_PHYID 256
>>>> +
>>>> +struct kvm_phyid_info {
>>>> + struct kvm_vcpu *vcpu;
>>>> + bool enabled;
>>>> +};
>>>> +
>>>> +struct kvm_phyid_map {
>>>> + int max_phyid;
>>>> + struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
>>>> +};
>>>> +
>>>> struct kvm_arch {
>>>> /* Guest physical mm */
>>>> kvm_pte_t *pgd;
>>>> @@ -71,6 +95,8 @@ struct kvm_arch {
>>>> unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
>>>> unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
>>>> unsigned int root_level;
>>>> + spinlock_t phyid_map_lock;
>>>> + struct kvm_phyid_map *phyid_map;
>>>>
>>>> s64 time_offset;
>>>> struct kvm_context __percpu *vmcs;
>>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>>>> index 0cb4fdb8a9b5..9f53950959da 100644
>>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>>>> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
>>>> void kvm_restore_timer(struct kvm_vcpu *vcpu);
>>>>
>>>> int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>>>>
>>>> /*
>>>> * Loongarch KVM guest interrupt handling
>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>> index 3a8779065f73..b633fd28b8db 100644
>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
>>>> return 0;
>>>> }
>>>>
>>>> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
>>>> +{
>>>> + int cpuid;
>>>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>>>> + struct kvm_phyid_map *map;
>>>> +
>>>> + if (val >= KVM_MAX_PHYID)
>>>> + return -EINVAL;
>>>> +
>>>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>>>> + map = vcpu->kvm->arch.phyid_map;
>>>> + spin_lock(&vcpu->kvm->arch.phyid_map_lock);
>>>> + if (map->phys_map[cpuid].enabled) {
>>>> + /*
>>>> + * Cpuid is already set before
>>>> + * Forbid changing different cpuid at runtime
>>>> + */
>>>> + if (cpuid != val) {
>>>> + /*
>>>> + * Cpuid 0 is initial value for vcpu, maybe invalid
>>>> + * unset value for vcpu
>>>> + */
>>>> + if (cpuid) {
>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>> + return -EINVAL;
>>>> + }
>>>> + } else {
>>>> + /* Discard duplicated cpuid set */
>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>> + return 0;
>>>> + }
>>>> + }
>>> I have changed the logic and comments when I apply, you can double
>>> check whether it is correct.
>> I checkout the latest version, the modification in function
>> kvm_set_cpuid() is good for me.
> Now the modified version is like this:
>
> + if (map->phys_map[cpuid].enabled) {
> + /* Discard duplicated CPUID set operation */
> + if (cpuid == val) {
> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return 0;
> + }
> +
> + /*
> + * CPUID is already set before
> + * Forbid changing different CPUID at runtime
> + * But CPUID 0 is the initial value for vcpu, so allow
> + * changing from 0 to others
> + */
> + if (cpuid) {
> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return -EINVAL;
> + }
> + }
> But I still doubt whether we should allow changing from 0 to others
> while map->phys_map[cpuid].enabled is 1.
It is necessary since the default sw cpuid is zero :-( And we can
optimize it in later, such as set INVALID cpuid in function
kvm_arch_vcpu_create() and logic will be simple in function kvm_set_cpuid().
Regards
Bibo Mao
>
> Huacai
>
>>>
>>>> +
>>>> + if (map->phys_map[val].enabled) {
>>>> + /*
>>>> + * New cpuid is already set with other vcpu
>>>> + * Forbid sharing the same cpuid between different vcpus
>>>> + */
>>>> + if (map->phys_map[val].vcpu != vcpu) {
>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> + /* Discard duplicated cpuid set operation*/
>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>> + return 0;
>>>> + }
>>>> +
>>>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
>>>> + map->phys_map[val].enabled = true;
>>>> + map->phys_map[val].vcpu = vcpu;
>>>> + if (map->max_phyid < val)
>>>> + map->max_phyid = val;
>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
>>>> +{
>>>> + struct kvm_phyid_map *map;
>>>> +
>>>> + if (cpuid >= KVM_MAX_PHYID)
>>>> + return NULL;
>>>> +
>>>> + map = kvm->arch.phyid_map;
>>>> + if (map->phys_map[cpuid].enabled)
>>>> + return map->phys_map[cpuid].vcpu;
>>>> +
>>>> + return NULL;
>>>> +}
>>>> +
>>>> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
>>>> +{
>>>> + int cpuid;
>>>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>>>> + struct kvm_phyid_map *map;
>>>> +
>>>> + map = vcpu->kvm->arch.phyid_map;
>>>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>>>> + if (cpuid >= KVM_MAX_PHYID)
>>>> + return;
>>>> +
>>>> + if (map->phys_map[cpuid].enabled) {
>>>> + map->phys_map[cpuid].vcpu = NULL;
>>>> + map->phys_map[cpuid].enabled = false;
>>>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
>>>> + }
>>>> +}
>>> While kvm_set_cpuid() is protected by a spinlock, do kvm_drop_cpuid()
>>> and kvm_get_vcpu_by_cpuid() also need it?
>>>
>> It is good to me that spinlock is added in function kvm_drop_cpuid().
>> And thinks for the efforts.
>>
>> Regards
>> Bibo Mao
>>>> +
>>>> static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>>>> {
>>>> int ret = 0, gintc;
>>>> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>>>> kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>>>>
>>>> return ret;
>>>> - }
>>>> + } else if (id == LOONGARCH_CSR_CPUID)
>>>> + return kvm_set_cpuid(vcpu, val);
>>>>
>>>> kvm_write_sw_gcsr(csr, id, val);
>>>>
>>>> @@ -943,6 +1033,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>>>> hrtimer_cancel(&vcpu->arch.swtimer);
>>>> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>>>> kfree(vcpu->arch.csr);
>>>> + kvm_drop_cpuid(vcpu);
>>> I think this line should be before the above kfree(), otherwise you
>>> get a "use after free".
>>>
>>> Huacai
>>>
>>>>
>>>> /*
>>>> * If the vCPU is freed and reused as another vCPU, we don't want the
>>>> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
>>>> index 0a37f6fa8f2d..6006a28653ad 100644
>>>> --- a/arch/loongarch/kvm/vm.c
>>>> +++ b/arch/loongarch/kvm/vm.c
>>>> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>> if (!kvm->arch.pgd)
>>>> return -ENOMEM;
>>>>
>>>> + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
>>>> + GFP_KERNEL_ACCOUNT);
>>>> + if (!kvm->arch.phyid_map) {
>>>> + free_page((unsigned long)kvm->arch.pgd);
>>>> + kvm->arch.pgd = NULL;
>>>> + return -ENOMEM;
>>>> + }
>>>> +
>>>> kvm_init_vmcs(kvm);
>>>> kvm->arch.gpa_size = BIT(cpu_vabits - 1);
>>>> kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
>>>> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>> for (i = 0; i <= kvm->arch.root_level; i++)
>>>> kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>>>>
>>>> + spin_lock_init(&kvm->arch.phyid_map_lock);
>>>> return 0;
>>>> }
>>>>
>>>> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>> {
>>>> kvm_destroy_vcpus(kvm);
>>>> free_page((unsigned long)kvm->arch.pgd);
>>>> + kvfree(kvm->arch.phyid_map);
>>>> kvm->arch.pgd = NULL;
>>>> + kvm->arch.phyid_map = NULL;
>>>> }
>>>>
>>>> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>> --
>>>> 2.39.3
>>>>
>>
Powered by blists - more mailing lists