lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Fri, 4 Sep 2020 20:21:46 +0800
From:   Haiwei Li <lihaiwei.kernel@...il.com>
To:     Vitaly Kuznetsov <vkuznets@...hat.com>
Cc:     "pbonzini@...hat.com" <pbonzini@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        "wanpengli@...cent.com" <wanpengli@...cent.com>,
        "jmattson@...gle.com" <jmattson@...gle.com>,
        "joro@...tes.org" <joro@...tes.org>, tglx@...utronix.de,
        mingo@...hat.com, "bp@...en8.de" <bp@...en8.de>,
        "hpa@...or.com" <hpa@...or.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
        "x86@...nel.org" <x86@...nel.org>
Subject: Re: [PATCH v2] KVM: Check the allocation of pv cpu mask



On 20/9/4 17:53, Vitaly Kuznetsov wrote:
> Haiwei Li <lihaiwei.kernel@...il.com> writes:
> 
>> On 20/9/3 18:39, Vitaly Kuznetsov wrote:
>>> Haiwei Li <lihaiwei.kernel@...il.com> writes:
>>>
>>>> From: Haiwei Li <lihaiwei@...cent.com>
>>>>
>>>> check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when
>>>> successful.
>>>>
>>>> Signed-off-by: Haiwei Li <lihaiwei@...cent.com>
>>>> ---
>>>>     arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++----
>>>>     1 file changed, 20 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>>> index 08320b0b2b27..d3c062e551d7 100644
>>>> --- a/arch/x86/kernel/kvm.c
>>>> +++ b/arch/x86/kernel/kvm.c
>>>> @@ -555,7 +555,6 @@ static void kvm_send_ipi_mask_allbutself(const
>>>> struct cpumask *mask, int vector)
>>>>     static void kvm_setup_pv_ipi(void)
>>>>     {
>>>>     	apic->send_IPI_mask = kvm_send_ipi_mask;
>>>> -	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>>>>     	pr_info("setup PV IPIs\n");
>>>>     }
>>>>
>>>> @@ -654,7 +653,6 @@ static void __init kvm_guest_init(void)
>>>>     	}
>>>>
>>>>     	if (pv_tlb_flush_supported()) {
>>>> -		pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>>>>     		pv_ops.mmu.tlb_remove_table = tlb_remove_table;
>>>>     		pr_info("KVM setup pv remote TLB flush\n");
>>>>     	}
>>>> @@ -767,6 +765,14 @@ static __init int activate_jump_labels(void)
>>>>     }
>>>>     arch_initcall(activate_jump_labels);
>>>>
>>>> +static void kvm_free_pv_cpu_mask(void)
>>>> +{
>>>> +	unsigned int cpu;
>>>> +
>>>> +	for_each_possible_cpu(cpu)
>>>> +		free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
>>>> +}
>>>> +
>>>>     static __init int kvm_alloc_cpumask(void)
>>>>     {
>>>>     	int cpu;
>>>> @@ -785,11 +791,21 @@ static __init int kvm_alloc_cpumask(void)
>>>>
>>>>     	if (alloc)
>>>>     		for_each_possible_cpu(cpu) {
>>>> -			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
>>>> -				GFP_KERNEL, cpu_to_node(cpu));
>>>> +			if (!zalloc_cpumask_var_node(
>>>> +				per_cpu_ptr(&__pv_cpu_mask, cpu),
>>>> +				GFP_KERNEL, cpu_to_node(cpu)))
>>>> +				goto zalloc_cpumask_fail;
>>>>     		}
>>>>
>>>> +#if defined(CONFIG_SMP)
>>>> +	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>>>> +#endif
>>>> +	pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>>>
>>> This is too late I'm afraid. If I'm not mistaken PV patching happens
>>> earlier, so .init.guest_late_init (kvm_guest_init()) is good and
>>> arch_initcall() is bad.
>>
>> .init.guest_late_init (kvm_guest_init()) is called before
>> arch_initcall() and kvm_flush_tlb_others && kvm_send_ipi_mask_allbutself
>> rely on __pv_cpu_mask.  So, i can not put this assign in kvm_guest_init().
>>
>>>
>>> Have you checked that with this patch kvm_flush_tlb_others() is still
>>> being called?
>>
>> yes. I add a printk and i get the log.
>>
> 
> This is weird. I do the following on top of your patch:
> 
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index d3c062e551d7..f441209ff0a4 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -620,6 +620,8 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
>          struct kvm_steal_time *src;
>          struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
>   
> +       trace_printk("PV TLB flush %d CPUs\n", cpumask_weight(cpumask));
> +
>          cpumask_copy(flushmask, cpumask);
>          /*
>           * We have to call flush only on online vCPUs. And
> 
> With your patch I don't see any calls:
> 
> # grep -c -v '^#' /sys/kernel/debug/tracing/trace
> 0
> 
> with your patch reverted I see them:
> 
> # grep -c -v '^#' /sys/kernel/debug/tracing/trace
> 4571

I just retested. You are right. I'm sorry.

> 
> 
>>>
>>> Actually, there is no need to assign kvm_flush_tlb_others() so late. We
>>> can always check if __pv_cpu_mask was allocated and revert back to the
>>> architectural path if not.
>> I am sorry i don't really understand. Can you explain in more detail? Thx.
>>
> 
> I mean we can always call e.g. kvm_flush_tlb_others(), even if (very
> unlikely) the mask wasn't allocated. We just need to check for
> that. Something like (completely untested):
> 
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index d3c062e551d7..e3676cdee6a2 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -620,6 +620,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
>          struct kvm_steal_time *src;
>          struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
>   
> +       if (unlikely(!flushmask)) {
> +               flushmask = cpumask;
> +               goto do_native;
> +       }
> +

I see. I appreciate your patience and kindness.

I will send a new version.

>          cpumask_copy(flushmask, cpumask);
>          /*
>           * We have to call flush only on online vCPUs. And
> @@ -635,6 +640,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
>                  }
>          }
>   
> +do_native:
>          native_flush_tlb_others(flushmask, info);
>   }
>   
> 
>>>
>>>>     	return 0;
>>>> +
>>>> +zalloc_cpumask_fail:
>>>> +	kvm_free_pv_cpu_mask();
>>>> +	return -ENOMEM;
>>>>     }
>>>>     arch_initcall(kvm_alloc_cpumask);
>>>>
>>>> --
>>>> 2.18.4
>>>>
>>>
>>
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ