linux-kernel - Re: [PATCH v2 3/3] LoongArch: KVM: Add FPU delay load support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAAhV-H5T6kHyYigy3P92Az1mum_vKf8Kd3e-hNGT5k_VZ+pF_Q@mail.gmail.com>
Date: Wed, 28 Jan 2026 15:38:26 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: Bibo Mao <maobibo@...ngson.cn>
Cc: WANG Xuerui <kernel@...0n.name>, Tianrui Zhao <zhaotianrui@...ngson.cn>, 
	loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [PATCH v2 3/3] LoongArch: KVM: Add FPU delay load support

On Wed, Jan 28, 2026 at 2:35 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>
>
>
> On 2026/1/28 下午12:05, Huacai Chen wrote:
> > Hi, Bibo,
> >
> > On Tue, Jan 27, 2026 at 8:51 PM Bibo Mao <maobibo@...ngson.cn> wrote:
> >>
> >> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
> >> loaded, vCPU can be preempted and FPU will be lost again. Here FPU
> >> is delay load until guest enter entry.
> >>
> >> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
> >> ---
> >>   arch/loongarch/include/asm/kvm_host.h |  2 ++
> >>   arch/loongarch/kvm/exit.c             | 15 ++++++++----
> >>   arch/loongarch/kvm/vcpu.c             | 33 +++++++++++++++++----------
> >>   3 files changed, 33 insertions(+), 17 deletions(-)
> >>
> >> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >> index e4fe5b8e8149..902ff7bc0e35 100644
> >> --- a/arch/loongarch/include/asm/kvm_host.h
> >> +++ b/arch/loongarch/include/asm/kvm_host.h
> >> @@ -37,6 +37,7 @@
> >>   #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
> >>   #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
> >>   #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
> >> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
> >>
> >>   #define KVM_GUESTDBG_SW_BP_MASK                \
> >>          (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> >> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
> >>          u64 vpid;
> >>          gpa_t flush_gpa;
> >>
> >> +       int fpu_load_type;
> > I think the logic of V1 is better, it doesn't increase the size of
> > kvm_vcpu_arch, and the constant checking is a little faster than
> > variable checking.
> The main reason is that FPU_LOAD request is not so frequent, there is
> atomic instruction in kvm_check_request() and the unconditional
> kvm_check_request() may be unnecessary, also there will LBT LOAD check
> in later version.
>
> So I think one unconditional atomic test_and_clear may be better than
> three/four atomic test_and_clear.
>      kvm_check_request(KVM_REQ_FPU_LOAD,vcpu)
>      kvm_check_request(KVM_REQ_FPU_LSX, vcpu)
>      kvm_check_request(KVM_REQ_FPU_LASX, vcpu)
>
> Actually different people have different view about this :)
Depends on how complex kvm_check_request() is. If it is very complex,
checking once and following a switch-case is better.

Huacai

>
> Regards
> Bibo Mao
> >
> > Huacai
> >
> >>          /* Frequency of stable timer in Hz */
> >>          u64 timer_mhz;
> >>          ktime_t expire;
> >> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> >> index 74b427287e96..b6f08df8fedb 100644
> >> --- a/arch/loongarch/kvm/exit.c
> >> +++ b/arch/loongarch/kvm/exit.c
> >> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>                  return RESUME_HOST;
> >>          }
> >>
> >> -       kvm_own_fpu(vcpu);
> >> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
> >> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>   {
> >>          if (!kvm_guest_has_lsx(&vcpu->arch))
> >>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >> -       else
> >> -               kvm_own_lsx(vcpu);
> >> +       else {
> >> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
> >> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >> +       }
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>   {
> >>          if (!kvm_guest_has_lasx(&vcpu->arch))
> >>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >> -       else
> >> -               kvm_own_lasx(vcpu);
> >> +       else {
> >> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
> >> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >> +       }
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> >> index d91a1160a309..3e749e9738b2 100644
> >> --- a/arch/loongarch/kvm/vcpu.c
> >> +++ b/arch/loongarch/kvm/vcpu.c
> >> @@ -232,6 +232,27 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
> >>                          kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
> >>                          vcpu->arch.flush_gpa = INVALID_GPA;
> >>                  }
> >> +
> >> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
> >> +               switch (vcpu->arch.fpu_load_type) {
> >> +               case KVM_LARCH_FPU:
> >> +                       kvm_own_fpu(vcpu);
> >> +                       break;
> >> +
> >> +               case KVM_LARCH_LSX:
> >> +                       kvm_own_lsx(vcpu);
> >> +                       break;
> >> +
> >> +               case KVM_LARCH_LASX:
> >> +                       kvm_own_lasx(vcpu);
> >> +                       break;
> >> +
> >> +               default:
> >> +                       break;
> >> +               }
> >> +
> >> +               vcpu->arch.fpu_load_type = 0;
> >> +       }
> >>   }
> >>
> >>   /*
> >> @@ -1338,8 +1359,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
> >>   /* Enable FPU and restore context */
> >>   void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >> -
> >>          /*
> >>           * Enable FPU for guest
> >>           * Set FR and FRE according to guest context
> >> @@ -1350,16 +1369,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>          kvm_restore_fpu(&vcpu->arch.fpu);
> >>          vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
> >>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
> >> -
> >> -       preempt_enable();
> >>   }
> >>
> >>   #ifdef CONFIG_CPU_HAS_LSX
> >>   /* Enable LSX and restore context */
> >>   int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >> -
> >>          /* Enable LSX for guest */
> >>          kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>          set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
> >> @@ -1381,8 +1396,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>
> >>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
> >>          vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
> >> -       preempt_enable();
> >> -
> >>          return 0;
> >>   }
> >>   #endif
> >> @@ -1391,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>   /* Enable LASX and restore context */
> >>   int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >> -
> >>          kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>          set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
> >>          switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
> >> @@ -1414,8 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>
> >>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
> >>          vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
> >> -       preempt_enable();
> >> -
> >>          return 0;
> >>   }
> >>   #endif
> >> --
> >> 2.39.3
> >>
> >>
>