linux-kernel - Re: [PATCH v4 2/4] KVM: x86/mmu: Dynamically allocate shadow MMU's hashed page list

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <7cc5cd92-1854-4e0e-93b7-e4eee5991334@intel.com>
Date: Wed, 28 May 2025 16:04:51 +0800
From: Xiaoyao Li <xiaoyao.li@...el.com>
To: Sean Christopherson <seanjc@...gle.com>,
 Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
 Vipin Sharma <vipinsh@...gle.com>, James Houghton <jthoughton@...gle.com>
Subject: Re: [PATCH v4 2/4] KVM: x86/mmu: Dynamically allocate shadow MMU's
 hashed page list

On 5/23/2025 8:11 AM, Sean Christopherson wrote:
> Dynamically allocate the (massive) array of hashed lists used to track
> shadow pages, as the array itself is 32KiB, i.e. is an order-3 allocation
> all on its own, and is *exactly* an order-3 allocation.  Dynamically
> allocating the array will allow allocating "struct kvm" using kvmalloc(),
> and will also allow deferring allocation of the array until it's actually
> needed, i.e. until the first shadow root is allocated.
> 
> Opportunistically use kvmalloc() for the hashed lists, as an order-3
> allocation is (stating the obvious) less likely to fail than an order-4
> allocation, and the overhead of vmalloc() is undesirable given that the
> size of the allocation is fixed.
> 
> Cc: Vipin Sharma <vipinsh@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  4 ++--
>   arch/x86/kvm/mmu/mmu.c          | 23 ++++++++++++++++++++++-
>   arch/x86/kvm/x86.c              |  5 ++++-
>   3 files changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 330cdcbed1a6..9667d6b929ee 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1343,7 +1343,7 @@ struct kvm_arch {
>   	bool has_private_mem;
>   	bool has_protected_state;
>   	bool pre_fault_allowed;
> -	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> +	struct hlist_head *mmu_page_hash;
>   	struct list_head active_mmu_pages;
>   	/*
>   	 * A list of kvm_mmu_page structs that, if zapped, could possibly be
> @@ -2006,7 +2006,7 @@ void kvm_mmu_vendor_module_exit(void);
>   
>   void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
>   int kvm_mmu_create(struct kvm_vcpu *vcpu);
> -void kvm_mmu_init_vm(struct kvm *kvm);
> +int kvm_mmu_init_vm(struct kvm *kvm);
>   void kvm_mmu_uninit_vm(struct kvm *kvm);
>   
>   void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index cbc84c6abc2e..41da2cb1e3f1 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3882,6 +3882,18 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
>   	return r;
>   }
>   
> +static int kvm_mmu_alloc_page_hash(struct kvm *kvm)
> +{
> +	typeof(kvm->arch.mmu_page_hash) h;

Out of curiousity, it is uncommon in KVM to use typeof() given that we 
know what the type actually is. Is there some specific reason?

anyway, it works.

Reviewed-by: Xiaoyao Li <xiaoyao.li@...el.com>

> +
> +	h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT);
> +	if (!h)
> +		return -ENOMEM;
> +
> +	kvm->arch.mmu_page_hash = h;
> +	return 0;
> +}
> +
>   static int mmu_first_shadow_root_alloc(struct kvm *kvm)
>   {
>   	struct kvm_memslots *slots;
> @@ -6675,13 +6687,19 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
>   		kvm_tdp_mmu_zap_invalidated_roots(kvm, true);
>   }
>   
> -void kvm_mmu_init_vm(struct kvm *kvm)
> +int kvm_mmu_init_vm(struct kvm *kvm)
>   {
> +	int r;
> +
>   	kvm->arch.shadow_mmio_value = shadow_mmio_value;
>   	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>   	INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
>   	spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
>   
> +	r = kvm_mmu_alloc_page_hash(kvm);
> +	if (r)
> +		return r;
> +
>   	if (tdp_mmu_enabled)
>   		kvm_mmu_init_tdp_mmu(kvm);
>   
> @@ -6692,6 +6710,7 @@ void kvm_mmu_init_vm(struct kvm *kvm)
>   
>   	kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
>   	kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
> +	return 0;
>   }
>   
>   static void mmu_free_vm_memory_caches(struct kvm *kvm)
> @@ -6703,6 +6722,8 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
>   
>   void kvm_mmu_uninit_vm(struct kvm *kvm)
>   {
> +	kvfree(kvm->arch.mmu_page_hash);
> +
>   	if (tdp_mmu_enabled)
>   		kvm_mmu_uninit_tdp_mmu(kvm);
>   
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f9f798f286ce..d204ba9368f8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12787,7 +12787,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>   	if (ret)
>   		goto out;
>   
> -	kvm_mmu_init_vm(kvm);
> +	ret = kvm_mmu_init_vm(kvm);
> +	if (ret)
> +		goto out_cleanup_page_track;
>   
>   	ret = kvm_x86_call(vm_init)(kvm);
>   	if (ret)
> @@ -12840,6 +12842,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>   
>   out_uninit_mmu:
>   	kvm_mmu_uninit_vm(kvm);
> +out_cleanup_page_track:
>   	kvm_page_track_cleanup(kvm);
>   out:
>   	return ret;