lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <61670353-90c6-6d0c-4430-7655b5251e17@loongson.cn>
Date: Mon, 6 May 2024 18:05:45 +0800
From: maobibo <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>,
 Paolo Bonzini <pbonzini@...hat.com>, Jonathan Corbet <corbet@....net>,
 loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org,
 virtualization@...ts.linux.dev, kvm@...r.kernel.org
Subject: Re: [PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from
 physical cpuid



On 2024/5/6 下午5:40, Huacai Chen wrote:
> On Mon, May 6, 2024 at 5:35 PM maobibo <maobibo@...ngson.cn> wrote:
>>
>>
>>
>> On 2024/5/6 下午4:59, Huacai Chen wrote:
>>> On Mon, May 6, 2024 at 4:18 PM maobibo <maobibo@...ngson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 2024/5/6 下午3:06, Huacai Chen wrote:
>>>>> Hi, Bibo,
>>>>>
>>>>> On Mon, May 6, 2024 at 2:36 PM maobibo <maobibo@...ngson.cn> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 2024/5/6 上午9:49, Huacai Chen wrote:
>>>>>>> Hi, Bibo,
>>>>>>>
>>>>>>> On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>>>>>>
>>>>>>>> Physical cpuid is used for interrupt routing for irqchips such as
>>>>>>>> ipi/msi/extioi interrupt controller. And physical cpuid is stored
>>>>>>>> at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
>>>>>>>> is created and physical cpuid of two vcpus cannot be the same.
>>>>>>>>
>>>>>>>> Different irqchips have different size declaration about physical cpuid,
>>>>>>>> max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
>>>>>>>> supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
>>>>>>>> for MSI irqchip.
>>>>>>>>
>>>>>>>> The smallest value from all interrupt controllers is selected now,
>>>>>>>> and the max cpuid size is defines as 256 by KVM which comes from
>>>>>>>> extioi irqchip.
>>>>>>>>
>>>>>>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>>>>>>> ---
>>>>>>>>      arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
>>>>>>>>      arch/loongarch/include/asm/kvm_vcpu.h |  1 +
>>>>>>>>      arch/loongarch/kvm/vcpu.c             | 93 ++++++++++++++++++++++++++-
>>>>>>>>      arch/loongarch/kvm/vm.c               | 11 ++++
>>>>>>>>      4 files changed, 130 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> index 2d62f7b0d377..3ba16ef1fe69 100644
>>>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>>>>>>>>
>>>>>>>>      #define MAX_PGTABLE_LEVELS     4
>>>>>>>>
>>>>>>>> +/*
>>>>>>>> + * Physical cpu id is used for interrupt routing, there are different
>>>>>>>> + * definitions about physical cpuid on different hardwares.
>>>>>>>> + *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
>>>>>>>> + *  For IPI HW, max dest CPUID size 1024
>>>>>>>> + *  For extioi interrupt controller, max dest CPUID size is 256
>>>>>>>> + *  For MSI interrupt controller, max supported CPUID size is 65536
>>>>>>>> + *
>>>>>>>> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
>>>>>>>> + * it will be expanded to 4096, including 16 packages at most. And every
>>>>>>>> + * package supports at most 256 vcpus
>>>>>>>> + */
>>>>>>>> +#define KVM_MAX_PHYID          256
>>>>>>>> +
>>>>>>>> +struct kvm_phyid_info {
>>>>>>>> +       struct kvm_vcpu *vcpu;
>>>>>>>> +       bool            enabled;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +struct kvm_phyid_map {
>>>>>>>> +       int max_phyid;
>>>>>>>> +       struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
>>>>>>>> +};
>>>>>>>> +
>>>>>>>>      struct kvm_arch {
>>>>>>>>             /* Guest physical mm */
>>>>>>>>             kvm_pte_t *pgd;
>>>>>>>> @@ -71,6 +95,8 @@ struct kvm_arch {
>>>>>>>>             unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
>>>>>>>>             unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
>>>>>>>>             unsigned int  root_level;
>>>>>>>> +       spinlock_t    phyid_map_lock;
>>>>>>>> +       struct kvm_phyid_map  *phyid_map;
>>>>>>>>
>>>>>>>>             s64 time_offset;
>>>>>>>>             struct kvm_context __percpu *vmcs;
>>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>>>> index 0cb4fdb8a9b5..9f53950959da 100644
>>>>>>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>>>> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
>>>>>>>>      void kvm_restore_timer(struct kvm_vcpu *vcpu);
>>>>>>>>
>>>>>>>>      int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
>>>>>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>>>>>>>>
>>>>>>>>      /*
>>>>>>>>       * Loongarch KVM guest interrupt handling
>>>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>>>>>> index 3a8779065f73..b633fd28b8db 100644
>>>>>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>>>>>> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>>
>>>>>>>> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
>>>>>>>> +{
>>>>>>>> +       int cpuid;
>>>>>>>> +       struct loongarch_csrs *csr = vcpu->arch.csr;
>>>>>>>> +       struct kvm_phyid_map  *map;
>>>>>>>> +
>>>>>>>> +       if (val >= KVM_MAX_PHYID)
>>>>>>>> +               return -EINVAL;
>>>>>>>> +
>>>>>>>> +       cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>>>>>>>> +       map = vcpu->kvm->arch.phyid_map;
>>>>>>>> +       spin_lock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>> +       if (map->phys_map[cpuid].enabled) {
>>>>>>>> +               /*
>>>>>>>> +                * Cpuid is already set before
>>>>>>>> +                * Forbid changing different cpuid at runtime
>>>>>>>> +                */
>>>>>>>> +               if (cpuid != val) {
>>>>>>>> +                       /*
>>>>>>>> +                        * Cpuid 0 is initial value for vcpu, maybe invalid
>>>>>>>> +                        * unset value for vcpu
>>>>>>>> +                        */
>>>>>>>> +                       if (cpuid) {
>>>>>>>> +                               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>> +                               return -EINVAL;
>>>>>>>> +                       }
>>>>>>>> +               } else {
>>>>>>>> +                        /* Discard duplicated cpuid set */
>>>>>>>> +                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>> +                       return 0;
>>>>>>>> +               }
>>>>>>>> +       }
>>>>>>> I have changed the logic and comments when I apply, you can double
>>>>>>> check whether it is correct.
>>>>>> I checkout the latest version, the modification in function
>>>>>> kvm_set_cpuid() is good for me.
>>>>> Now the modified version is like this:
>>>>>
>>>>> + if (map->phys_map[cpuid].enabled) {
>>>>> + /* Discard duplicated CPUID set operation */
>>>>> + if (cpuid == val) {
>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>> + return 0;
>>>>> + }
>>>>> +
>>>>> + /*
>>>>> + * CPUID is already set before
>>>>> + * Forbid changing different CPUID at runtime
>>>>> + * But CPUID 0 is the initial value for vcpu, so allow
>>>>> + * changing from 0 to others
>>>>> + */
>>>>> + if (cpuid) {
>>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>> + return -EINVAL;
>>>>> + }
>>>>> + }
>>>>> But I still doubt whether we should allow changing from 0 to others
>>>>> while map->phys_map[cpuid].enabled is 1.
>>>> It is necessary since the default sw cpuid is zero :-( And we can
>>>> optimize it in later, such as set INVALID cpuid in function
>>>> kvm_arch_vcpu_create() and logic will be simple in function kvm_set_cpuid().
>>> In my opinion, if a vcpu with a uninitialized default physid=0, then
>>> map->phys_map[cpuid].enabled should be 0, then code won't come here.
>>> And if a vcpu with a real physid=0, then map->phys_map[cpuid].enabled
>>> is 1, but we shouldn't allow it to change physid in this case.
>> yes, that is actually a problem.
>>
>> vcpu0 firstly set physid=0, and vcpu0 set physid=1 again is not allowed.
>> vcpu0 firstly set physid=0, and vcpu1 set physid=1 is allowed.
> 
> So can we simply drop the if (cpuid) checking? That means:
> + if (map->phys_map[cpuid].enabled) {
> + /* Discard duplicated CPUID set operation */
> + if (cpuid == val) {
> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return 0;
> + }
> +
> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return -EINVAL;
> + }
yes, the similar modification such as following, since the secondary
scenario should be allowed.
  "vcpu0 firstly set physid=0, and vcpu1 set physid=1 is allowed though 
default sw cpuid is zero"

--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -272,7 +272,7 @@ static inline int kvm_set_cpuid(struct kvm_vcpu 
*vcpu, u64 val)
         cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);

         spin_lock(&vcpu->kvm->arch.phyid_map_lock);
-       if (map->phys_map[cpuid].enabled) {
+       if ((cpuid != KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) {
                 /* Discard duplicated CPUID set operation */
                 if (cpuid == val) {
                         spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
@@ -282,13 +282,9 @@ static inline int kvm_set_cpuid(struct kvm_vcpu 
*vcpu, u64 val)
                 /*
                  * CPUID is already set before
                  * Forbid changing different CPUID at runtime
-                * But CPUID 0 is the initial value for vcpu, so allow
-                * changing from 0 to others
                  */
-               if (cpuid) {
-                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
-                       return -EINVAL;
-               }
+               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+               return -EINVAL;
         }

         if (map->phys_map[val].enabled) {
@@ -1029,6 +1025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)

         /* Set cpuid */
         kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id);
+       kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);

         /* Start with no pending virtual guest interrupts */
         csr->csrs[LOONGARCH_CSR_GINTC] = 0;


> 
> Huacai
> 
>>
>>
>>>
>>> Huacai
>>>
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>>>>
>>>>>>>> +
>>>>>>>> +       if (map->phys_map[val].enabled) {
>>>>>>>> +               /*
>>>>>>>> +                * New cpuid is already set with other vcpu
>>>>>>>> +                * Forbid sharing the same cpuid between different vcpus
>>>>>>>> +                */
>>>>>>>> +               if (map->phys_map[val].vcpu != vcpu) {
>>>>>>>> +                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>> +                       return -EINVAL;
>>>>>>>> +               }
>>>>>>>> +
>>>>>>>> +               /* Discard duplicated cpuid set operation*/
>>>>>>>> +               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>> +               return 0;
>>>>>>>> +       }
>>>>>>>> +
>>>>>>>> +       kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
>>>>>>>> +       map->phys_map[val].enabled      = true;
>>>>>>>> +       map->phys_map[val].vcpu         = vcpu;
>>>>>>>> +       if (map->max_phyid < val)
>>>>>>>> +               map->max_phyid = val;
>>>>>>>> +       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
>>>>>>>> +       return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
>>>>>>>> +{
>>>>>>>> +       struct kvm_phyid_map  *map;
>>>>>>>> +
>>>>>>>> +       if (cpuid >= KVM_MAX_PHYID)
>>>>>>>> +               return NULL;
>>>>>>>> +
>>>>>>>> +       map = kvm->arch.phyid_map;
>>>>>>>> +       if (map->phys_map[cpuid].enabled)
>>>>>>>> +               return map->phys_map[cpuid].vcpu;
>>>>>>>> +
>>>>>>>> +       return NULL;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
>>>>>>>> +{
>>>>>>>> +       int cpuid;
>>>>>>>> +       struct loongarch_csrs *csr = vcpu->arch.csr;
>>>>>>>> +       struct kvm_phyid_map  *map;
>>>>>>>> +
>>>>>>>> +       map = vcpu->kvm->arch.phyid_map;
>>>>>>>> +       cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>>>>>>>> +       if (cpuid >= KVM_MAX_PHYID)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +       if (map->phys_map[cpuid].enabled) {
>>>>>>>> +               map->phys_map[cpuid].vcpu = NULL;
>>>>>>>> +               map->phys_map[cpuid].enabled = false;
>>>>>>>> +               kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
>>>>>>>> +       }
>>>>>>>> +}
>>>>>>> While kvm_set_cpuid() is protected by a spinlock, do kvm_drop_cpuid()
>>>>>>> and kvm_get_vcpu_by_cpuid() also need it?
>>>>>>>
>>>>>> It is good to me that spinlock is added in function kvm_drop_cpuid().
>>>>>> And thinks for the efforts.
>>>>>>
>>>>>> Regards
>>>>>> Bibo Mao
>>>>>>>> +
>>>>>>>>      static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>>>>>>>>      {
>>>>>>>>             int ret = 0, gintc;
>>>>>>>> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>>>>>>>>                     kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>>>>>>>>
>>>>>>>>                     return ret;
>>>>>>>> -       }
>>>>>>>> +       } else if (id == LOONGARCH_CSR_CPUID)
>>>>>>>> +               return kvm_set_cpuid(vcpu, val);
>>>>>>>>
>>>>>>>>             kvm_write_sw_gcsr(csr, id, val);
>>>>>>>>
>>>>>>>> @@ -943,6 +1033,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>>>>>>>>             hrtimer_cancel(&vcpu->arch.swtimer);
>>>>>>>>             kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>>>>>>>>             kfree(vcpu->arch.csr);
>>>>>>>> +       kvm_drop_cpuid(vcpu);
>>>>>>> I think this line should be before the above kfree(), otherwise you
>>>>>>> get a "use after free".
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>>>
>>>>>>>>             /*
>>>>>>>>              * If the vCPU is freed and reused as another vCPU, we don't want the
>>>>>>>> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
>>>>>>>> index 0a37f6fa8f2d..6006a28653ad 100644
>>>>>>>> --- a/arch/loongarch/kvm/vm.c
>>>>>>>> +++ b/arch/loongarch/kvm/vm.c
>>>>>>>> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>>>>>>             if (!kvm->arch.pgd)
>>>>>>>>                     return -ENOMEM;
>>>>>>>>
>>>>>>>> +       kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
>>>>>>>> +                               GFP_KERNEL_ACCOUNT);
>>>>>>>> +       if (!kvm->arch.phyid_map) {
>>>>>>>> +               free_page((unsigned long)kvm->arch.pgd);
>>>>>>>> +               kvm->arch.pgd = NULL;
>>>>>>>> +               return -ENOMEM;
>>>>>>>> +       }
>>>>>>>> +
>>>>>>>>             kvm_init_vmcs(kvm);
>>>>>>>>             kvm->arch.gpa_size = BIT(cpu_vabits - 1);
>>>>>>>>             kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
>>>>>>>> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>>>>>>             for (i = 0; i <= kvm->arch.root_level; i++)
>>>>>>>>                     kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>>>>>>>>
>>>>>>>> +       spin_lock_init(&kvm->arch.phyid_map_lock);
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>>
>>>>>>>> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>>>>>>      {
>>>>>>>>             kvm_destroy_vcpus(kvm);
>>>>>>>>             free_page((unsigned long)kvm->arch.pgd);
>>>>>>>> +       kvfree(kvm->arch.phyid_map);
>>>>>>>>             kvm->arch.pgd = NULL;
>>>>>>>> +       kvm->arch.phyid_map = NULL;
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>>>>>> --
>>>>>>>> 2.39.3
>>>>>>>>
>>>>>>
>>>>
>>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ