[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <147fe837-5f81-4246-7d01-84b75cb94e6f@loongson.cn>
Date: Wed, 19 Nov 2025 14:09:42 +0800
From: Bibo Mao <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: Paolo Bonzini <pbonzini@...hat.com>, WANG Xuerui <kernel@...0n.name>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Will Deacon <will@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Waiman Long <longman@...hat.com>, Juergen Gross <jgross@...e.com>,
Ajay Kaher <ajay.kaher@...adcom.com>,
Alexey Makhalov <alexey.makhalov@...adcom.com>,
Broadcom internal kernel review list
<bcm-kernel-feedback-list@...adcom.com>, kvm@...r.kernel.org,
loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org,
virtualization@...ts.linux.dev, x86@...nel.org
Subject: Re: [PATCH 2/3] LoongArch: Add paravirt support with
vcpu_is_preempted()
On 2025/11/19 上午9:59, Bibo Mao wrote:
>
>
> On 2025/11/18 下午8:48, Huacai Chen wrote:
>> Hi, Bibo,
>>
>> On Tue, Nov 18, 2025 at 4:07 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>
>>> Function vcpu_is_preempted() is used to check whether vCPU is preempted
>>> or not. Here add implementation with vcpu_is_preempted() when option
>>> CONFIG_PARAVIRT is enabled.
>>>
>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>> ---
>>> arch/loongarch/include/asm/smp.h | 1 +
>>> arch/loongarch/include/asm/spinlock.h | 5 +++++
>>> arch/loongarch/kernel/paravirt.c | 16 ++++++++++++++++
>>> arch/loongarch/kernel/smp.c | 6 ++++++
>>> 4 files changed, 28 insertions(+)
>>>
>>> diff --git a/arch/loongarch/include/asm/smp.h
>>> b/arch/loongarch/include/asm/smp.h
>>> index 3a47f52959a8..5b37f7bf2060 100644
>>> --- a/arch/loongarch/include/asm/smp.h
>>> +++ b/arch/loongarch/include/asm/smp.h
>>> @@ -18,6 +18,7 @@ struct smp_ops {
>>> void (*init_ipi)(void);
>>> void (*send_ipi_single)(int cpu, unsigned int action);
>>> void (*send_ipi_mask)(const struct cpumask *mask, unsigned
>>> int action);
>>> + bool (*vcpu_is_preempted)(int cpu);
>>> };
>>> extern struct smp_ops mp_ops;
>>>
>>> diff --git a/arch/loongarch/include/asm/spinlock.h
>>> b/arch/loongarch/include/asm/spinlock.h
>>> index 7cb3476999be..c001cef893aa 100644
>>> --- a/arch/loongarch/include/asm/spinlock.h
>>> +++ b/arch/loongarch/include/asm/spinlock.h
>>> @@ -5,6 +5,11 @@
>>> #ifndef _ASM_SPINLOCK_H
>>> #define _ASM_SPINLOCK_H
>>>
>>> +#ifdef CONFIG_PARAVIRT
>>> +#define vcpu_is_preempted vcpu_is_preempted
>>> +bool vcpu_is_preempted(int cpu);
>>> +#endif
>> Maybe paravirt.h is a better place?
>
> It is actually a little strange to add macro CONFIG_PARAVIRT in file
> asm/spinlock.h
>
> vcpu_is_preempted is originally defined in header file
> include/linux/sched.h like this
> #ifndef vcpu_is_preempted
> static inline bool vcpu_is_preempted(int cpu)
> {
> return false;
> }
> #endif
>
> that requires that header file is included before sched.h, file
> asm/spinlock.h can meet this requirement, however header file paravirt.h
> maybe it is not included before sched.h in generic.
>
> Here vcpu_is_preempted definition is added before the following including.
> #include <asm/processor.h>
> #include <asm/qspinlock.h>
> #include <asm/qrwlock.h>
> Maybe it is better to be added after the above header files including
> sentences, but need further investigation.
>>
>>> +
>>> #include <asm/processor.h>
>>> #include <asm/qspinlock.h>
>>> #include <asm/qrwlock.h>
>>> diff --git a/arch/loongarch/kernel/paravirt.c
>>> b/arch/loongarch/kernel/paravirt.c
>>> index b1b51f920b23..b99404b6b13f 100644
>>> --- a/arch/loongarch/kernel/paravirt.c
>>> +++ b/arch/loongarch/kernel/paravirt.c
>>> @@ -52,6 +52,13 @@ static u64 paravt_steal_clock(int cpu)
>>> #ifdef CONFIG_SMP
>>> static struct smp_ops native_ops;
>>>
>>> +static bool pv_vcpu_is_preempted(int cpu)
>>> +{
>>> + struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
>>> +
>>> + return !!(src->preempted & KVM_VCPU_PREEMPTED);
>>> +}
>>> +
>>> static void pv_send_ipi_single(int cpu, unsigned int action)
>>> {
>>> int min, old;
>>> @@ -308,6 +315,9 @@ int __init pv_time_init(void)
>>> pr_err("Failed to install cpu hotplug callbacks\n");
>>> return r;
>>> }
>>> +
>>> + if (kvm_para_has_feature(KVM_FEATURE_PREEMPT_HINT))
>>> + mp_ops.vcpu_is_preempted = pv_vcpu_is_preempted;
>>> #endif
>>>
>>> static_call_update(pv_steal_clock, paravt_steal_clock);
>>> @@ -332,3 +342,9 @@ int __init pv_spinlock_init(void)
>>>
>>> return 0;
>>> }
>>> +
>>> +bool notrace vcpu_is_preempted(int cpu)
>>> +{
>>> + return mp_ops.vcpu_is_preempted(cpu);
>>> +}
>>
>> We can simplify the whole patch like this, then we don't need to touch
>> smp.c, and we can merge Patch-2/3.
>>
>> +bool notrace vcpu_is_preempted(int cpu)
>> +{
>> + if (!kvm_para_has_feature(KVM_FEATURE_PREEMPT_HINT))
>> + return false;
>> + else {
>> + struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
>> + return !!(src->preempted & KVM_VCPU_PREEMPTED);
>> + }
>> +}
> 1. there is assembly output about relative vcpu_is_preempted
> <loongson_vcpu_is_preempted>:
> move $r4,$r0
> jirl $r0,$r1,0
>
> <pv_vcpu_is_preempted>:
> pcalau12i $r13,8759(0x2237)
> slli.d $r4,$r4,0x3
> addi.d $r13,$r13,-1000(0xc18)
> ldx.d $r13,$r13,$r4
> pcalau12i $r12,5462(0x1556)
> addi.d $r12,$r12,384(0x180)
> add.d $r12,$r13,$r12
> ld.bu $r4,$r12,16(0x10)
> andi $r4,$r4,0x1
> jirl $r0,$r1,0
>
> <vcpu_is_preempted>:
> pcalau12i $r12,8775(0x2247)
> ld.d $r12,$r12,-472(0xe28)
> jirl $r0,$r12,0
> andi $r0,$r0,0x0
>
> <vcpu_is_preempted_new>:
> pcalau12i $r12,8151(0x1fd7)
> ld.d $r12,$r12,-1008(0xc10)
> bstrpick.d $r12,$r12,0x1a,0x1a
> beqz $r12,188(0xbc) # 900000000024ec60
> pcalau12i $r12,11802(0x2e1a)
> addi.d $r12,$r12,-1400(0xa88)
> ldptr.w $r14,$r12,36(0x24)
> beqz $r14,108(0x6c) # 900000000024ec20
> addi.w $r13,$r0,1(0x1)
> bne $r14,$r13,164(0xa4) # 900000000024ec60
> ldptr.w $r13,$r12,40(0x28)
> bnez $r13,24(0x18) # 900000000024ebdc
> lu12i.w $r14,262144(0x40000)
> ori $r14,$r14,0x4
> cpucfg $r14,$r14
> slli.w $r13,$r14,0x0
> st.w $r14,$r12,40(0x28)
> bstrpick.d $r13,$r13,0x3,0x3
> beqz $r13,128(0x80) # 900000000024ec60
> pcalau12i $r13,8759(0x2237)
> slli.d $r4,$r4,0x3
> addi.d $r13,$r13,-1000(0xc18)
> ldx.d $r13,$r13,$r4
> pcalau12i $r12,5462(0x1556)
> addi.d $r12,$r12,384(0x180)
> add.d $r12,$r13,$r12
> ld.bu $r4,$r12,16(0x10)
> andi $r4,$r4,0x1
> jirl $r0,$r1,0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> lu12i.w $r13,262144(0x40000)
> cpucfg $r13,$r13
> lu12i.w $r15,1237(0x4d5)
> ori $r15,$r15,0x64b
> slli.w $r13,$r13,0x0
> bne $r13,$r15,-124(0x3ff84) # 900000000024ebb8
> addi.w $r13,$r0,1(0x1)
> st.w $r13,$r12,36(0x24)
> b -128(0xfffff80) # 900000000024ebc0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> andi $r0,$r0,0x0
> move $r4,$r0
> jirl $r0,$r1,0
>
> With vcpu_is_preempted(), there is one memory load and one jirl jump,
> with vcpu_is_preempted_new(), there is two memory load and two beq
> compare instructions.
>
> 2. In some scenery such nr_cpus == 1, loongson_vcpu_is_preempted() is
> better than pv_vcpu_is_preempted() even if the preempt feature is enabled.
how about use static key and keep file smp.c untouched?
bool notrace vcpu_is_preempted(int cpu)
{
struct kvm_steal_time *src;
if (!static_branch_unlikely(&virt_preempt_key))
return false;
src = &per_cpu(steal_time, cpu);
return !!(src->preempted & KVM_VCPU_PREEMPTED);
}
it reduces one memory load, here is assembly output:
<vcpu_is_preempted>:
andi $r0,$r0,0x0
move $r4,$r0
jirl $r0,$r1,0
andi $r0,$r0,0x0
pcalau12i $r13,8759(0x2237)
slli.d $r4,$r4,0x3
addi.d $r13,$r13,-1000(0xc18)
ldx.d $r13,$r13,$r4
pcalau12i $r12,5462(0x1556)
addi.d $r12,$r12,384(0x180)
add.d $r12,$r13,$r12
ld.bu $r4,$r12,16(0x10)
andi $r4,$r4,0x1
jirl $r0,$r1,0
Regards
Bibo Mao
>
> Regards
> Bibo Mao
>> Huacai
>>
>>> +EXPORT_SYMBOL(vcpu_is_preempted);
>>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>>> index 46036d98da75..f04192fedf8d 100644
>>> --- a/arch/loongarch/kernel/smp.c
>>> +++ b/arch/loongarch/kernel/smp.c
>>> @@ -307,10 +307,16 @@ static void loongson_init_ipi(void)
>>> panic("IPI IRQ request failed\n");
>>> }
>>>
>>> +static bool loongson_vcpu_is_preempted(int cpu)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> struct smp_ops mp_ops = {
>>> .init_ipi = loongson_init_ipi,
>>> .send_ipi_single = loongson_send_ipi_single,
>>> .send_ipi_mask = loongson_send_ipi_mask,
>>> + .vcpu_is_preempted = loongson_vcpu_is_preempted,
>>> };
>>>
>>> static void __init fdt_smp_setup(void)
>>> --
>>> 2.39.3
>>>
>>>
>
Powered by blists - more mailing lists