linux-kernel - Re: [PATCH v6 9/9] KVM: VMX: Optimize memory allocation for PID-pointer table

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <113db01c73b8fe061b8226e75849317bac7873a5.camel@redhat.com>
Date:   Fri, 25 Feb 2022 19:29:39 +0200
From:   Maxim Levitsky <mlevitsk@...hat.com>
To:     Zeng Guang <guang.zeng@...el.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm@...r.kernel.org,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Tony Luck <tony.luck@...el.com>,
        Kan Liang <kan.liang@...ux.intel.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        "H. Peter Anvin" <hpa@...or.com>,
        Kim Phillips <kim.phillips@....com>,
        Jarkko Sakkinen <jarkko@...nel.org>,
        Jethro Beekman <jethro@...tanix.com>,
        Kai Huang <kai.huang@...el.com>
Cc:     x86@...nel.org, linux-kernel@...r.kernel.org,
        Robert Hu <robert.hu@...el.com>, Gao Chao <chao.gao@...el.com>
Subject: Re: [PATCH v6 9/9] KVM: VMX: Optimize memory allocation for
 PID-pointer table

On Fri, 2022-02-25 at 16:22 +0800, Zeng Guang wrote:
> Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor
> pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to
> KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of
> VMs have less than 512 vCPUs and then just need one page.
> 
> If user hypervisor specify max practical vcpu id prior to vCPU creation,
> IPIv can allocate only essential memory for PID-pointer table and reduce
> the memory footprint of VMs.
> 
> Suggested-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Zeng Guang <guang.zeng@...el.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++--------------
>  1 file changed, 30 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0cb141c277ef..22bfb4953289 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -230,9 +230,6 @@ static const struct {
>  };
>  
>  #define L1D_CACHE_ORDER 4
> -
> -/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
> -#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
>  #define PID_TABLE_ENTRY_VALID 1
>  
>  static void *vmx_l1d_flush_pages;
> @@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  	return exec_control;
>  }
>  
> +static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx)
> +{
> +	struct page *pages;
> +
> +	if(kvm_vmx->pid_table)
> +		return 0;
> +
> +	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> +			get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64)));
> +
> +	if (!pages)
> +		return -ENOMEM;
> +
> +	kvm_vmx->pid_table = (void *)page_address(pages);
> +	kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1;
> +	return 0;
> +}
> +
>  #define VMX_XSS_EXIT_BITMAP 0
>  
>  static void init_vmcs(struct vcpu_vmx *vmx)
> @@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
>  			goto free_vmcs;
>  	}
>  
> +	if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) {
> +		struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
> +
> +		mutex_lock(&vcpu->kvm->lock);
> +		err = vmx_alloc_pid_table(kvm_vmx);
> +		mutex_unlock(&vcpu->kvm->lock);
> +		if (err)
> +			goto free_vmcs;
> +	}

This could be dangerous. If APICv is temporary inhibited,
this code won't run and we will end up without PID table.

I think that kvm_vcpu_apicv_active should be just dropped
from this condition.

Best regards,
	Maxim Levitsky


> +
>  	return 0;
>  
>  free_vmcs:
> @@ -7202,17 +7227,6 @@ static int vmx_vm_init(struct kvm *kvm)
>  		}
>  	}
>  
> -	if (enable_ipiv) {
> -		struct page *pages;
> -
> -		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER);
> -		if (!pages)
> -			return -ENOMEM;
> -
> -		to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages);
> -		to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1;
> -	}
> -
>  	return 0;
>  }
>  
> @@ -7809,7 +7823,8 @@ static void vmx_vm_destroy(struct kvm *kvm)
>  	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
>  
>  	if (kvm_vmx->pid_table)
> -		free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER);
> +		free_pages((unsigned long)kvm_vmx->pid_table,
> +			get_order((kvm_vmx->pid_last_index + 1) * sizeof(u64)));
>  }
>  
>  static struct kvm_x86_ops vmx_x86_ops __initdata = {