[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a6d49710-1580-809d-5dcf-ea4207257ae7@loongson.cn>
Date: Tue, 7 May 2024 09:40:22 +0800
From: maobibo <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
Paolo Bonzini <pbonzini@...hat.com>, Jonathan Corbet <corbet@....net>,
loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org,
virtualization@...ts.linux.dev, kvm@...r.kernel.org
Subject: Re: [PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from
physical cpuid
On 2024/5/6 下午10:17, Huacai Chen wrote:
> On Mon, May 6, 2024 at 6:05 PM maobibo <maobibo@...ngson.cn> wrote:
>>
>>
>>
>> On 2024/5/6 下午5:40, Huacai Chen wrote:
>>> On Mon, May 6, 2024 at 5:35 PM maobibo <maobibo@...ngson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 2024/5/6 下午4:59, Huacai Chen wrote:
>>>>> On Mon, May 6, 2024 at 4:18 PM maobibo <maobibo@...ngson.cn> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 2024/5/6 下午3:06, Huacai Chen wrote:
>>>>>>> Hi, Bibo,
>>>>>>>
>>>>>>> On Mon, May 6, 2024 at 2:36 PM maobibo <maobibo@...ngson.cn> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2024/5/6 上午9:49, Huacai Chen wrote:
>>>>>>>>> Hi, Bibo,
>>>>>>>>>
>>>>>>>>> On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>>>>>>>>
>>>>>>>>>> Physical cpuid is used for interrupt routing for irqchips such as
>>>>>>>>>> ipi/msi/extioi interrupt controller. And physical cpuid is stored
>>>>>>>>>> at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
>>>>>>>>>> is created and physical cpuid of two vcpus cannot be the same.
>>>>>>>>>>
>>>>>>>>>> Different irqchips have different size declaration about physical cpuid,
>>>>>>>>>> max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
>>>>>>>>>> supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
>>>>>>>>>> for MSI irqchip.
>>>>>>>>>>
>>>>>>>>>> The smallest value from all interrupt controllers is selected now,
>>>>>>>>>> and the max cpuid size is defines as 256 by KVM which comes from
>>>>>>>>>> extioi irqchip.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>>>>>>>>> ---
>>>>>>>>>> arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
>>>>>>>>>> arch/loongarch/include/asm/kvm_vcpu.h | 1 +
>>>>>>>>>> arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++-
>>>>>>>>>> arch/loongarch/kvm/vm.c | 11 ++++
>>>>>>>>>> 4 files changed, 130 insertions(+), 1 deletion(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>>>> index 2d62f7b0d377..3ba16ef1fe69 100644
>>>>>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>>>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>>>> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>>>>>>>>>>
>>>>>>>>>> #define MAX_PGTABLE_LEVELS 4
>>>>>>>>>>
>>>>>>>>>> +/*
>>>>>>>>>> + * Physical cpu id is used for interrupt routing, there are different
>>>>>>>>>> + * definitions about physical cpuid on different hardwares.
>>>>>>>>>> + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
>>>>>>>>>> + * For IPI HW, max dest CPUID size 1024
>>>>>>>>>> + * For extioi interrupt controller, max dest CPUID size is 256
>>>>>>>>>> + * For MSI interrupt controller, max supported CPUID size is 65536
>>>>>>>>>> + *
>>>>>>>>>> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
>>>>>>>>>> + * it will be expanded to 4096, including 16 packages at most. And every
>>>>>>>>>> + * package supports at most 256 vcpus
>>>>>>>>>> + */
>>>>>>>>>> +#define KVM_MAX_PHYID 256
>>>>>>>>>> +
>>>>>>>>>> +struct kvm_phyid_info {
>>>>>>>>>> + struct kvm_vcpu *vcpu;
>>>>>>>>>> + bool enabled;
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> +struct kvm_phyid_map {
>>>>>>>>>> + int max_phyid;
>>>>>>>>>> + struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> struct kvm_arch {
>>>>>>>>>> /* Guest physical mm */
>>>>>>>>>> kvm_pte_t *pgd;
>>>>>>>>>> @@ -71,6 +95,8 @@ struct kvm_arch {
>>>>>>>>>> unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
>>>>>>>>>> unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
>>>>>>>>>> unsigned int root_level;
>>>>>>>>>> + spinlock_t phyid_map_lock;
>>>>>>>>>> + struct kvm_phyid_map *phyid_map;
>>>>>>>>>>
>>>>>>>>>> s64 time_offset;
>>>>>>>>>> struct kvm_context __percpu *vmcs;
>>>>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>>>>>> index 0cb4fdb8a9b5..9f53950959da 100644
>>>>>>>>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>>>>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>>>>>> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
>>>>>>>>>> void kvm_restore_timer(struct kvm_vcpu *vcpu);
>>>>>>>>>>
>>>>>>>>>> int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
>>>>>>>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>>>>>>>>>>
>>>>>>>>>> /*
>>>>>>>>>> * Loongarch KVM guest interrupt handling
>>>>>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>>>>>>>> index 3a8779065f73..b633fd28b8db 100644
>>>>>>>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>>>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>>>>>>>> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
>>>>>>>>>> return 0;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
>>>>>>>>>> +{
>>>>>>>>>> + int cpuid;
>>>>>>>>>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>>>>>>>>>> + struct kvm_phyid_map *map;
>>>>>>>>>> +
>>>>>>>>>> + if (val >= KVM_MAX_PHYID)
>>>>>>>>>> + return -EINVAL;
>>>>>>>>>> +
>>>>>>>>>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>>>>>>>>>> + map = vcpu->kvm->arch.phyid_map;
>>>>>>>>>> + spin_lock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>>>> + if (map->phys_map[cpuid].enabled) {
>>>>>>>>>> + /*
>>>>>>>>>> + * Cpuid is already set before
>>>>>>>>>> + * Forbid changing different cpuid at runtime
>>>>>>>>>> + */
>>>>>>>>>> + if (cpuid != val) {
>>>>>>>>>> + /*
>>>>>>>>>> + * Cpuid 0 is initial value for vcpu, maybe invalid
>>>>>>>>>> + * unset value for vcpu
>>>>>>>>>> + */
>>>>>>>>>> + if (cpuid) {
>>>>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>>>> + return -EINVAL;
>>>>>>>>>> + }
>>>>>>>>>> + } else {
>>>>>>>>>> + /* Discard duplicated cpuid set */
>>>>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>>>> + return 0;
>>>>>>>>>> + }
>>>>>>>>>> + }
>>>>>>>>> I have changed the logic and comments when I apply, you can double
>>>>>>>>> check whether it is correct.
>>>>>>>> I checkout the latest version, the modification in function
>>>>>>>> kvm_set_cpuid() is good for me.
>>>>>>> Now the modified version is like this:
>>>>>>>
>>>>>>> + if (map->phys_map[cpuid].enabled) {
>>>>>>> + /* Discard duplicated CPUID set operation */
>>>>>>> + if (cpuid == val) {
>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>> + return 0;
>>>>>>> + }
>>>>>>> +
>>>>>>> + /*
>>>>>>> + * CPUID is already set before
>>>>>>> + * Forbid changing different CPUID at runtime
>>>>>>> + * But CPUID 0 is the initial value for vcpu, so allow
>>>>>>> + * changing from 0 to others
>>>>>>> + */
>>>>>>> + if (cpuid) {
>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>> + return -EINVAL;
>>>>>>> + }
>>>>>>> + }
>>>>>>> But I still doubt whether we should allow changing from 0 to others
>>>>>>> while map->phys_map[cpuid].enabled is 1.
>>>>>> It is necessary since the default sw cpuid is zero :-( And we can
>>>>>> optimize it in later, such as set INVALID cpuid in function
>>>>>> kvm_arch_vcpu_create() and logic will be simple in function kvm_set_cpuid().
>>>>> In my opinion, if a vcpu with a uninitialized default physid=0, then
>>>>> map->phys_map[cpuid].enabled should be 0, then code won't come here.
>>>>> And if a vcpu with a real physid=0, then map->phys_map[cpuid].enabled
>>>>> is 1, but we shouldn't allow it to change physid in this case.
>>>> yes, that is actually a problem.
>>>>
>>>> vcpu0 firstly set physid=0, and vcpu0 set physid=1 again is not allowed.
>>>> vcpu0 firstly set physid=0, and vcpu1 set physid=1 is allowed.
>>>
>>> So can we simply drop the if (cpuid) checking? That means:
>>> + if (map->phys_map[cpuid].enabled) {
>>> + /* Discard duplicated CPUID set operation */
>>> + if (cpuid == val) {
>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>> + return 0;
>>> + }
>>> +
>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>> + return -EINVAL;
>>> + }
>> yes, the similar modification such as following, since the secondary
>> scenario should be allowed.
>> "vcpu0 firstly set physid=0, and vcpu1 set physid=1 is allowed though
>> default sw cpuid is zero"
>>
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -272,7 +272,7 @@ static inline int kvm_set_cpuid(struct kvm_vcpu
>> *vcpu, u64 val)
>> cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);
>>
>> spin_lock(&vcpu->kvm->arch.phyid_map_lock);
>> - if (map->phys_map[cpuid].enabled) {
>> + if ((cpuid != KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) {
>> /* Discard duplicated CPUID set operation */
>> if (cpuid == val) {
>> spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> @@ -282,13 +282,9 @@ static inline int kvm_set_cpuid(struct kvm_vcpu
>> *vcpu, u64 val)
>> /*
>> * CPUID is already set before
>> * Forbid changing different CPUID at runtime
>> - * But CPUID 0 is the initial value for vcpu, so allow
>> - * changing from 0 to others
>> */
>> - if (cpuid) {
>> - spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> - return -EINVAL;
>> - }
>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return -EINVAL;
>> }
>>
>> if (map->phys_map[val].enabled) {
>> @@ -1029,6 +1025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>>
>> /* Set cpuid */
>> kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id);
>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);
>>
>> /* Start with no pending virtual guest interrupts */
>> csr->csrs[LOONGARCH_CSR_GINTC] = 0;
> Very nice, but I think kvm_drop_cpuid() should also set to KVM_MAX_PHYID.
> Now I update my loongarch-kvm branch, you can test it again, and hope
> it is in the perfect status.
I sync and test the latest code from loongarch-kvm, pv ipi works well
with 256 vcpus. And the code looks good to me, thanks for your review in
short time.
Regards
Bibo Mao
>
> Huacai
>>
>>
>>>
>>> Huacai
>>>
>>>>
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>>>
>>>>>> Regards
>>>>>> Bibo Mao
>>>>>>
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>>>>
>>>>>>>>>> +
>>>>>>>>>> + if (map->phys_map[val].enabled) {
>>>>>>>>>> + /*
>>>>>>>>>> + * New cpuid is already set with other vcpu
>>>>>>>>>> + * Forbid sharing the same cpuid between different vcpus
>>>>>>>>>> + */
>>>>>>>>>> + if (map->phys_map[val].vcpu != vcpu) {
>>>>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>>>> + return -EINVAL;
>>>>>>>>>> + }
>>>>>>>>>> +
>>>>>>>>>> + /* Discard duplicated cpuid set operation*/
>>>>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>>>> + return 0;
>>>>>>>>>> + }
>>>>>>>>>> +
>>>>>>>>>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
>>>>>>>>>> + map->phys_map[val].enabled = true;
>>>>>>>>>> + map->phys_map[val].vcpu = vcpu;
>>>>>>>>>> + if (map->max_phyid < val)
>>>>>>>>>> + map->max_phyid = val;
>>>>>>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>>>> + return 0;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
>>>>>>>>>> +{
>>>>>>>>>> + struct kvm_phyid_map *map;
>>>>>>>>>> +
>>>>>>>>>> + if (cpuid >= KVM_MAX_PHYID)
>>>>>>>>>> + return NULL;
>>>>>>>>>> +
>>>>>>>>>> + map = kvm->arch.phyid_map;
>>>>>>>>>> + if (map->phys_map[cpuid].enabled)
>>>>>>>>>> + return map->phys_map[cpuid].vcpu;
>>>>>>>>>> +
>>>>>>>>>> + return NULL;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
>>>>>>>>>> +{
>>>>>>>>>> + int cpuid;
>>>>>>>>>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>>>>>>>>>> + struct kvm_phyid_map *map;
>>>>>>>>>> +
>>>>>>>>>> + map = vcpu->kvm->arch.phyid_map;
>>>>>>>>>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>>>>>>>>>> + if (cpuid >= KVM_MAX_PHYID)
>>>>>>>>>> + return;
>>>>>>>>>> +
>>>>>>>>>> + if (map->phys_map[cpuid].enabled) {
>>>>>>>>>> + map->phys_map[cpuid].vcpu = NULL;
>>>>>>>>>> + map->phys_map[cpuid].enabled = false;
>>>>>>>>>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
>>>>>>>>>> + }
>>>>>>>>>> +}
>>>>>>>>> While kvm_set_cpuid() is protected by a spinlock, do kvm_drop_cpuid()
>>>>>>>>> and kvm_get_vcpu_by_cpuid() also need it?
>>>>>>>>>
>>>>>>>> It is good to me that spinlock is added in function kvm_drop_cpuid().
>>>>>>>> And thinks for the efforts.
>>>>>>>>
>>>>>>>> Regards
>>>>>>>> Bibo Mao
>>>>>>>>>> +
>>>>>>>>>> static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>>>>>>>>>> {
>>>>>>>>>> int ret = 0, gintc;
>>>>>>>>>> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>>>>>>>>>> kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>>>>>>>>>>
>>>>>>>>>> return ret;
>>>>>>>>>> - }
>>>>>>>>>> + } else if (id == LOONGARCH_CSR_CPUID)
>>>>>>>>>> + return kvm_set_cpuid(vcpu, val);
>>>>>>>>>>
>>>>>>>>>> kvm_write_sw_gcsr(csr, id, val);
>>>>>>>>>>
>>>>>>>>>> @@ -943,6 +1033,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>>>>>>>>>> hrtimer_cancel(&vcpu->arch.swtimer);
>>>>>>>>>> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>>>>>>>>>> kfree(vcpu->arch.csr);
>>>>>>>>>> + kvm_drop_cpuid(vcpu);
>>>>>>>>> I think this line should be before the above kfree(), otherwise you
>>>>>>>>> get a "use after free".
>>>>>>>>>
>>>>>>>>> Huacai
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> /*
>>>>>>>>>> * If the vCPU is freed and reused as another vCPU, we don't want the
>>>>>>>>>> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
>>>>>>>>>> index 0a37f6fa8f2d..6006a28653ad 100644
>>>>>>>>>> --- a/arch/loongarch/kvm/vm.c
>>>>>>>>>> +++ b/arch/loongarch/kvm/vm.c
>>>>>>>>>> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>>>>>>>> if (!kvm->arch.pgd)
>>>>>>>>>> return -ENOMEM;
>>>>>>>>>>
>>>>>>>>>> + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
>>>>>>>>>> + GFP_KERNEL_ACCOUNT);
>>>>>>>>>> + if (!kvm->arch.phyid_map) {
>>>>>>>>>> + free_page((unsigned long)kvm->arch.pgd);
>>>>>>>>>> + kvm->arch.pgd = NULL;
>>>>>>>>>> + return -ENOMEM;
>>>>>>>>>> + }
>>>>>>>>>> +
>>>>>>>>>> kvm_init_vmcs(kvm);
>>>>>>>>>> kvm->arch.gpa_size = BIT(cpu_vabits - 1);
>>>>>>>>>> kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
>>>>>>>>>> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>>>>>>>> for (i = 0; i <= kvm->arch.root_level; i++)
>>>>>>>>>> kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>>>>>>>>>>
>>>>>>>>>> + spin_lock_init(&kvm->arch.phyid_map_lock);
>>>>>>>>>> return 0;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>>>>>>>> {
>>>>>>>>>> kvm_destroy_vcpus(kvm);
>>>>>>>>>> free_page((unsigned long)kvm->arch.pgd);
>>>>>>>>>> + kvfree(kvm->arch.phyid_map);
>>>>>>>>>> kvm->arch.pgd = NULL;
>>>>>>>>>> + kvm->arch.phyid_map = NULL;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>>>>>>>> --
>>>>>>>>>> 2.39.3
>>>>>>>>>>
>>>>>>>>
>>>>>>
>>>>
>>
Powered by blists - more mailing lists