linux-kernel - Re: [PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from physical cpuid

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAhV-H5wNmgxGincGE7cJ8WvrpKFauAJvMHrPttW-LrKB4UeHg@mail.gmail.com>
Date: Mon, 6 May 2024 22:17:51 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: maobibo <maobibo@...ngson.cn>
Cc: Tianrui Zhao <zhaotianrui@...ngson.cn>, Juergen Gross <jgross@...e.com>, 
	Paolo Bonzini <pbonzini@...hat.com>, Jonathan Corbet <corbet@....net>, loongarch@...ts.linux.dev, 
	linux-kernel@...r.kernel.org, virtualization@...ts.linux.dev, 
	kvm@...r.kernel.org
Subject: Re: [PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from
 physical cpuid

On Mon, May 6, 2024 at 6:05 PM maobibo <maobibo@...ngson.cn> wrote:
>
>
>
> On 2024/5/6 下午5:40, Huacai Chen wrote:
> > On Mon, May 6, 2024 at 5:35 PM maobibo <maobibo@...ngson.cn> wrote:
> >>
> >>
> >>
> >> On 2024/5/6 下午4:59, Huacai Chen wrote:
> >>> On Mon, May 6, 2024 at 4:18 PM maobibo <maobibo@...ngson.cn> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 2024/5/6 下午3:06, Huacai Chen wrote:
> >>>>> Hi, Bibo,
> >>>>>
> >>>>> On Mon, May 6, 2024 at 2:36 PM maobibo <maobibo@...ngson.cn> wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On 2024/5/6 上午9:49, Huacai Chen wrote:
> >>>>>>> Hi, Bibo,
> >>>>>>>
> >>>>>>> On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@...ngson.cn> wrote:
> >>>>>>>>
> >>>>>>>> Physical cpuid is used for interrupt routing for irqchips such as
> >>>>>>>> ipi/msi/extioi interrupt controller. And physical cpuid is stored
> >>>>>>>> at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
> >>>>>>>> is created and physical cpuid of two vcpus cannot be the same.
> >>>>>>>>
> >>>>>>>> Different irqchips have different size declaration about physical cpuid,
> >>>>>>>> max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
> >>>>>>>> supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
> >>>>>>>> for MSI irqchip.
> >>>>>>>>
> >>>>>>>> The smallest value from all interrupt controllers is selected now,
> >>>>>>>> and the max cpuid size is defines as 256 by KVM which comes from
> >>>>>>>> extioi irqchip.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
> >>>>>>>> ---
> >>>>>>>>      arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
> >>>>>>>>      arch/loongarch/include/asm/kvm_vcpu.h |  1 +
> >>>>>>>>      arch/loongarch/kvm/vcpu.c             | 93 ++++++++++++++++++++++++++-
> >>>>>>>>      arch/loongarch/kvm/vm.c               | 11 ++++
> >>>>>>>>      4 files changed, 130 insertions(+), 1 deletion(-)
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >>>>>>>> index 2d62f7b0d377..3ba16ef1fe69 100644
> >>>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
> >>>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
> >>>>>>>> @@ -64,6 +64,30 @@ struct kvm_world_switch {
> >>>>>>>>
> >>>>>>>>      #define MAX_PGTABLE_LEVELS     4
> >>>>>>>>
> >>>>>>>> +/*
> >>>>>>>> + * Physical cpu id is used for interrupt routing, there are different
> >>>>>>>> + * definitions about physical cpuid on different hardwares.
> >>>>>>>> + *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
> >>>>>>>> + *  For IPI HW, max dest CPUID size 1024
> >>>>>>>> + *  For extioi interrupt controller, max dest CPUID size is 256
> >>>>>>>> + *  For MSI interrupt controller, max supported CPUID size is 65536
> >>>>>>>> + *
> >>>>>>>> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
> >>>>>>>> + * it will be expanded to 4096, including 16 packages at most. And every
> >>>>>>>> + * package supports at most 256 vcpus
> >>>>>>>> + */
> >>>>>>>> +#define KVM_MAX_PHYID          256
> >>>>>>>> +
> >>>>>>>> +struct kvm_phyid_info {
> >>>>>>>> +       struct kvm_vcpu *vcpu;
> >>>>>>>> +       bool            enabled;
> >>>>>>>> +};
> >>>>>>>> +
> >>>>>>>> +struct kvm_phyid_map {
> >>>>>>>> +       int max_phyid;
> >>>>>>>> +       struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
> >>>>>>>> +};
> >>>>>>>> +
> >>>>>>>>      struct kvm_arch {
> >>>>>>>>             /* Guest physical mm */
> >>>>>>>>             kvm_pte_t *pgd;
> >>>>>>>> @@ -71,6 +95,8 @@ struct kvm_arch {
> >>>>>>>>             unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
> >>>>>>>>             unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
> >>>>>>>>             unsigned int  root_level;
> >>>>>>>> +       spinlock_t    phyid_map_lock;
> >>>>>>>> +       struct kvm_phyid_map  *phyid_map;
> >>>>>>>>
> >>>>>>>>             s64 time_offset;
> >>>>>>>>             struct kvm_context __percpu *vmcs;
> >>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
> >>>>>>>> index 0cb4fdb8a9b5..9f53950959da 100644
> >>>>>>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
> >>>>>>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
> >>>>>>>> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
> >>>>>>>>      void kvm_restore_timer(struct kvm_vcpu *vcpu);
> >>>>>>>>
> >>>>>>>>      int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
> >>>>>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
> >>>>>>>>
> >>>>>>>>      /*
> >>>>>>>>       * Loongarch KVM guest interrupt handling
> >>>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpuc
> >>>>>>>> index 3a8779065f73..b633fd28b8db 100644
> >>>>>>>> --- a/arch/loongarch/kvm/vcpu.c
> >>>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
> >>>>>>>> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
> >>>>>>>>             return 0;
> >>>>>>>>      }
> >>>>>>>>
> >>>>>>>> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
> >>>>>>>> +{
> >>>>>>>> +       int cpuid;
> >>>>>>>> +       struct loongarch_csrs *csr = vcpu->arch.csr;
> >>>>>>>> +       struct kvm_phyid_map  *map;
> >>>>>>>> +
> >>>>>>>> +       if (val >= KVM_MAX_PHYID)
> >>>>>>>> +               return -EINVAL;
> >>>>>>>> +
> >>>>>>>> +       cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
> >>>>>>>> +       map = vcpu->kvm->arch.phyid_map;
> >>>>>>>> +       spin_lock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>>>>> +       if (map->phys_map[cpuid].enabled) {
> >>>>>>>> +               /*
> >>>>>>>> +                * Cpuid is already set before
> >>>>>>>> +                * Forbid changing different cpuid at runtime
> >>>>>>>> +                */
> >>>>>>>> +               if (cpuid != val) {
> >>>>>>>> +                       /*
> >>>>>>>> +                        * Cpuid 0 is initial value for vcpu, maybe invalid
> >>>>>>>> +                        * unset value for vcpu
> >>>>>>>> +                        */
> >>>>>>>> +                       if (cpuid) {
> >>>>>>>> +                               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>>>>> +                               return -EINVAL;
> >>>>>>>> +                       }
> >>>>>>>> +               } else {
> >>>>>>>> +                        /* Discard duplicated cpuid set */
> >>>>>>>> +                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>>>>> +                       return 0;
> >>>>>>>> +               }
> >>>>>>>> +       }
> >>>>>>> I have changed the logic and comments when I apply, you can double
> >>>>>>> check whether it is correct.
> >>>>>> I checkout the latest version, the modification in function
> >>>>>> kvm_set_cpuid() is good for me.
> >>>>> Now the modified version is like this:
> >>>>>
> >>>>> + if (map->phys_map[cpuid].enabled) {
> >>>>> + /* Discard duplicated CPUID set operation */
> >>>>> + if (cpuid == val) {
> >>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>> + return 0;
> >>>>> + }
> >>>>> +
> >>>>> + /*
> >>>>> + * CPUID is already set before
> >>>>> + * Forbid changing different CPUID at runtime
> >>>>> + * But CPUID 0 is the initial value for vcpu, so allow
> >>>>> + * changing from 0 to others
> >>>>> + */
> >>>>> + if (cpuid) {
> >>>>> + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>> + return -EINVAL;
> >>>>> + }
> >>>>> + }
> >>>>> But I still doubt whether we should allow changing from 0 to others
> >>>>> while map->phys_map[cpuid].enabled is 1.
> >>>> It is necessary since the default sw cpuid is zero :-( And we can
> >>>> optimize it in later, such as set INVALID cpuid in function
> >>>> kvm_arch_vcpu_create() and logic will be simple in function kvm_set_cpuid().
> >>> In my opinion, if a vcpu with a uninitialized default physid=0, then
> >>> map->phys_map[cpuid].enabled should be 0, then code won't come here.
> >>> And if a vcpu with a real physid=0, then map->phys_map[cpuid].enabled
> >>> is 1, but we shouldn't allow it to change physid in this case.
> >> yes, that is actually a problem.
> >>
> >> vcpu0 firstly set physid=0, and vcpu0 set physid=1 again is not allowed.
> >> vcpu0 firstly set physid=0, and vcpu1 set physid=1 is allowed.
> >
> > So can we simply drop the if (cpuid) checking? That means:
> > + if (map->phys_map[cpuid].enabled) {
> > + /* Discard duplicated CPUID set operation */
> > + if (cpuid == val) {
> > + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> > + return 0;
> > + }
> > +
> > + spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> > + return -EINVAL;
> > + }
> yes, the similar modification such as following, since the secondary
> scenario should be allowed.
>   "vcpu0 firstly set physid=0, and vcpu1 set physid=1 is allowed though
> default sw cpuid is zero"
>
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -272,7 +272,7 @@ static inline int kvm_set_cpuid(struct kvm_vcpu
> *vcpu, u64 val)
>          cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);
>
>          spin_lock(&vcpu->kvm->arch.phyid_map_lock);
> -       if (map->phys_map[cpuid].enabled) {
> +       if ((cpuid != KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) {
>                  /* Discard duplicated CPUID set operation */
>                  if (cpuid == val) {
>                          spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> @@ -282,13 +282,9 @@ static inline int kvm_set_cpuid(struct kvm_vcpu
> *vcpu, u64 val)
>                  /*
>                   * CPUID is already set before
>                   * Forbid changing different CPUID at runtime
> -                * But CPUID 0 is the initial value for vcpu, so allow
> -                * changing from 0 to others
>                   */
> -               if (cpuid) {
> -                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> -                       return -EINVAL;
> -               }
> +               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> +               return -EINVAL;
>          }
>
>          if (map->phys_map[val].enabled) {
> @@ -1029,6 +1025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>
>          /* Set cpuid */
>          kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id);
> +       kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);
>
>          /* Start with no pending virtual guest interrupts */
>          csr->csrs[LOONGARCH_CSR_GINTC] = 0;
Very nice, but I think kvm_drop_cpuid() should also set to KVM_MAX_PHYID.
Now I update my loongarch-kvm branch, you can test it again, and hope
it is in the perfect status.

Huacai
>
>
> >
> > Huacai
> >
> >>
> >>
> >>>
> >>> Huacai
> >>>
> >>>>
> >>>> Regards
> >>>> Bibo Mao
> >>>>
> >>>>>
> >>>>> Huacai
> >>>>>
> >>>>>>>
> >>>>>>>> +
> >>>>>>>> +       if (map->phys_map[val].enabled) {
> >>>>>>>> +               /*
> >>>>>>>> +                * New cpuid is already set with other vcpu
> >>>>>>>> +                * Forbid sharing the same cpuid between different vcpus
> >>>>>>>> +                */
> >>>>>>>> +               if (map->phys_map[val].vcpu != vcpu) {
> >>>>>>>> +                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>>>>> +                       return -EINVAL;
> >>>>>>>> +               }
> >>>>>>>> +
> >>>>>>>> +               /* Discard duplicated cpuid set operation*/
> >>>>>>>> +               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>>>>> +               return 0;
> >>>>>>>> +       }
> >>>>>>>> +
> >>>>>>>> +       kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
> >>>>>>>> +       map->phys_map[val].enabled      = true;
> >>>>>>>> +       map->phys_map[val].vcpu         = vcpu;
> >>>>>>>> +       if (map->max_phyid < val)
> >>>>>>>> +               map->max_phyid = val;
> >>>>>>>> +       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> >>>>>>>> +       return 0;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
> >>>>>>>> +{
> >>>>>>>> +       struct kvm_phyid_map  *map;
> >>>>>>>> +
> >>>>>>>> +       if (cpuid >= KVM_MAX_PHYID)
> >>>>>>>> +               return NULL;
> >>>>>>>> +
> >>>>>>>> +       map = kvm->arch.phyid_map;
> >>>>>>>> +       if (map->phys_map[cpuid].enabled)
> >>>>>>>> +               return map->phys_map[cpuid].vcpu;
> >>>>>>>> +
> >>>>>>>> +       return NULL;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
> >>>>>>>> +{
> >>>>>>>> +       int cpuid;
> >>>>>>>> +       struct loongarch_csrs *csr = vcpu->arch.csr;
> >>>>>>>> +       struct kvm_phyid_map  *map;
> >>>>>>>> +
> >>>>>>>> +       map = vcpu->kvm->arch.phyid_map;
> >>>>>>>> +       cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
> >>>>>>>> +       if (cpuid >= KVM_MAX_PHYID)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +       if (map->phys_map[cpuid].enabled) {
> >>>>>>>> +               map->phys_map[cpuid].vcpu = NULL;
> >>>>>>>> +               map->phys_map[cpuid].enabled = false;
> >>>>>>>> +               kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
> >>>>>>>> +       }
> >>>>>>>> +}
> >>>>>>> While kvm_set_cpuid() is protected by a spinlock, do kvm_drop_cpuid()
> >>>>>>> and kvm_get_vcpu_by_cpuid() also need it?
> >>>>>>>
> >>>>>> It is good to me that spinlock is added in function kvm_drop_cpuid().
> >>>>>> And thinks for the efforts.
> >>>>>>
> >>>>>> Regards
> >>>>>> Bibo Mao
> >>>>>>>> +
> >>>>>>>>      static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
> >>>>>>>>      {
> >>>>>>>>             int ret = 0, gintc;
> >>>>>>>> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
> >>>>>>>>                     kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
> >>>>>>>>
> >>>>>>>>                     return ret;
> >>>>>>>> -       }
> >>>>>>>> +       } else if (id == LOONGARCH_CSR_CPUID)
> >>>>>>>> +               return kvm_set_cpuid(vcpu, val);
> >>>>>>>>
> >>>>>>>>             kvm_write_sw_gcsr(csr, id, val);
> >>>>>>>>
> >>>>>>>> @@ -943,6 +1033,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> >>>>>>>>             hrtimer_cancel(&vcpu->arch.swtimer);
> >>>>>>>>             kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
> >>>>>>>>             kfree(vcpu->arch.csr);
> >>>>>>>> +       kvm_drop_cpuid(vcpu);
> >>>>>>> I think this line should be before the above kfree(), otherwise you
> >>>>>>> get a "use after free".
> >>>>>>>
> >>>>>>> Huacai
> >>>>>>>
> >>>>>>>>
> >>>>>>>>             /*
> >>>>>>>>              * If the vCPU is freed and reused as another vCPU, we don't want the
> >>>>>>>> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
> >>>>>>>> index 0a37f6fa8f2d..6006a28653ad 100644
> >>>>>>>> --- a/arch/loongarch/kvm/vm.c
> >>>>>>>> +++ b/arch/loongarch/kvm/vm.c
> >>>>>>>> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >>>>>>>>             if (!kvm->arch.pgd)
> >>>>>>>>                     return -ENOMEM;
> >>>>>>>>
> >>>>>>>> +       kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
> >>>>>>>> +                               GFP_KERNEL_ACCOUNT);
> >>>>>>>> +       if (!kvm->arch.phyid_map) {
> >>>>>>>> +               free_page((unsigned long)kvm->arch.pgd);
> >>>>>>>> +               kvm->arch.pgd = NULL;
> >>>>>>>> +               return -ENOMEM;
> >>>>>>>> +       }
> >>>>>>>> +
> >>>>>>>>             kvm_init_vmcs(kvm);
> >>>>>>>>             kvm->arch.gpa_size = BIT(cpu_vabits - 1);
> >>>>>>>>             kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
> >>>>>>>> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >>>>>>>>             for (i = 0; i <= kvm->arch.root_level; i++)
> >>>>>>>>                     kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
> >>>>>>>>
> >>>>>>>> +       spin_lock_init(&kvm->arch.phyid_map_lock);
> >>>>>>>>             return 0;
> >>>>>>>>      }
> >>>>>>>>
> >>>>>>>> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
> >>>>>>>>      {
> >>>>>>>>             kvm_destroy_vcpus(kvm);
> >>>>>>>>             free_page((unsigned long)kvm->arch.pgd);
> >>>>>>>> +       kvfree(kvm->arch.phyid_map);
> >>>>>>>>             kvm->arch.pgd = NULL;
> >>>>>>>> +       kvm->arch.phyid_map = NULL;
> >>>>>>>>      }
> >>>>>>>>
> >>>>>>>>      int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> >>>>>>>> --
> >>>>>>>> 2.39.3
> >>>>>>>>
> >>>>>>
> >>>>
> >>
>