[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZCSLnRj2V5tOx4gU@google.com>
Date: Wed, 29 Mar 2023 12:03:57 -0700
From: David Matlack <dmatlack@...gle.com>
To: Vipin Sharma <vipinsh@...gle.com>
Cc: seanjc@...gle.com, pbonzini@...hat.com, bgardon@...gle.com,
jmattson@...gle.com, mizhang@...gle.com, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [Patch v4 16/18] KVM: x86/mmu: Allocate numa aware page tables
during page fault
On Tue, Mar 28, 2023 at 05:21:29PM -0700, David Matlack wrote:
> On Mon, Mar 06, 2023 at 02:41:25PM -0800, Vipin Sharma wrote:
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 64de083cd6b9..77d3aa368e5e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -787,7 +787,7 @@ struct kvm_vcpu_arch {
> > struct kvm_mmu *walk_mmu;
> >
> > struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
> > - struct kvm_mmu_memory_cache mmu_shadow_page_cache;
> > + struct kvm_mmu_memory_cache mmu_shadow_page_cache[MAX_NUMNODES];
>
> I think we need an abstraction for a NUMA-aware mmu cache, since there
> is more than one by the end of this series.
>
> e.g. A wrapper struct (struct kvm_mmu_numa_memory_cache) or make
> NUMA-awareness an optional feature within kvm_mmu_memory_cache, plus
> common helper functions for operations like initializing, topping-up,
> and freeing.
>
> I have some ideas I want to try but I ran out of time today.
Something like this (compile test only, applies on top of this series):
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 041302d6132c..b44f867d0ed2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -787,7 +787,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu *walk_mmu;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
- struct kvm_mmu_memory_cache mmu_shadow_page_cache[MAX_NUMNODES];
+ struct kvm_mmu_numa_memory_cache mmu_shadow_page_cache;
struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
@@ -1453,7 +1453,7 @@ struct kvm_arch {
*
* Protected by kvm->slots_lock.
*/
- struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
+ struct kvm_mmu_numa_memory_cache split_shadow_page_cache;
struct kvm_mmu_memory_cache split_page_header_cache;
/*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5463ce6e52fa..fb7b3932f08d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -702,7 +702,7 @@ static void mmu_free_sp_memory_cache(struct kvm_mmu_memory_cache *cache)
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
{
- int r, nid = KVM_MMU_DEFAULT_CACHE_INDEX;
+ int r;
/* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */
r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
@@ -710,16 +710,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
if (r)
return r;
- if (kvm_numa_aware_page_table_enabled(vcpu->kvm)) {
- for_each_online_node(nid) {
- r = mmu_topup_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache[nid],
- KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE);
- }
- } else {
- r = mmu_topup_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache[nid],
- KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE);
- }
-
+ r = kvm_mmu_topup_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
+ KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE);
if (r)
return r;
@@ -735,12 +727,9 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
- int nid;
-
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
mutex_lock(&vcpu->arch.mmu_shadow_page_cache_lock);
- for_each_node(nid)
- mmu_free_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache[nid]);
+ kvm_mmu_free_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
mmu_free_sp_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
mutex_unlock(&vcpu->arch.mmu_shadow_page_cache_lock);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
@@ -2262,7 +2251,7 @@ static struct kvm_mmu_page *kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu,
{
struct shadow_page_caches caches = {
.page_header_cache = &vcpu->arch.mmu_page_header_cache,
- .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache[nid],
+ .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache.nodes[nid],
.shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache,
};
@@ -5977,7 +5966,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
- int ret, nid;
+ int ret;
INIT_KVM_MMU_MEMORY_CACHE(&vcpu->arch.mmu_pte_list_desc_cache);
vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
@@ -5985,11 +5974,9 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
INIT_KVM_MMU_MEMORY_CACHE(&vcpu->arch.mmu_page_header_cache);
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
- for_each_node(nid) {
- INIT_KVM_MMU_MEMORY_CACHE(&vcpu->arch.mmu_shadow_page_cache[nid]);
- if (kvm_numa_aware_page_table_enabled(vcpu->kvm))
- vcpu->arch.mmu_shadow_page_cache[nid].node = nid;
- }
+ kvm_mmu_init_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
+ if (kvm_numa_aware_page_table_enabled(vcpu->kvm))
+ kvm_mmu_enable_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
mutex_init(&vcpu->arch.mmu_shadow_page_cache_lock);
@@ -6140,7 +6127,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
int kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
- int r, nid;
+ int r;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
@@ -6159,9 +6146,7 @@ int kvm_mmu_init_vm(struct kvm *kvm)
INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache);
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
- for_each_node(nid)
- INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid]);
-
+ kvm_mmu_init_numa_memory_cache(&kvm->arch.split_shadow_page_cache);
INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache);
kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
@@ -6171,13 +6156,10 @@ int kvm_mmu_init_vm(struct kvm *kvm)
static void mmu_free_vm_memory_caches(struct kvm *kvm)
{
- int nid;
-
kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
mutex_lock(&kvm->slots_lock);
- for_each_node(nid)
- mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid]);
+ kvm_mmu_free_numa_memory_cache(&kvm->arch.split_shadow_page_cache);
mutex_unlock(&kvm->slots_lock);
}
@@ -6299,7 +6281,7 @@ static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid)
*/
return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) ||
need_topup(&kvm->arch.split_page_header_cache, 1) ||
- need_topup(&kvm->arch.split_shadow_page_cache[nid], 1);
+ need_topup(&kvm->arch.split_shadow_page_cache.nodes[nid], 1);
}
static int topup_split_caches(struct kvm *kvm, int nid)
@@ -6332,7 +6314,7 @@ static int topup_split_caches(struct kvm *kvm, int nid)
if (r)
return r;
- return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid], 1);
+ return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache.nodes[nid], 1);
}
static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep,
@@ -6357,7 +6339,7 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
/* Direct SPs do not require a shadowed_info_cache. */
caches.page_header_cache = &kvm->arch.split_page_header_cache;
- caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache[nid];
+ caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache.nodes[nid];
/* Safe to pass NULL for vCPU since requesting a direct SP. */
return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role);
@@ -6760,7 +6742,7 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink,
list_move_tail(&kvm->vm_list, &vm_list);
kvm_for_each_vcpu(i, vcpu, kvm) {
- freed += mmu_memory_cache_try_empty(vcpu->arch.mmu_shadow_page_cache,
+ freed += mmu_memory_cache_try_empty(vcpu->arch.mmu_shadow_page_cache.nodes,
MAX_NUMNODES,
&vcpu->arch.mmu_shadow_page_cache_lock);
freed += mmu_memory_cache_try_empty(&vcpu->arch.mmu_shadowed_info_cache,
@@ -6769,7 +6751,7 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink,
if (freed >= sc->nr_to_scan)
goto out;
}
- freed += mmu_memory_cache_try_empty(kvm->arch.split_shadow_page_cache,
+ freed += mmu_memory_cache_try_empty(kvm->arch.split_shadow_page_cache.nodes,
MAX_NUMNODES, &kvm->slots_lock);
if (freed >= sc->nr_to_scan)
goto out;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 63113a66f560..721d5a415807 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -265,7 +265,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu, int nid)
struct kvm_mmu_page *sp;
sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
- sp->spt = mmu_sp_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache[nid]);
+ sp->spt = mmu_sp_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache.nodes[nid]);
return sp;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d8ea39b248cd..940099629626 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6176,7 +6176,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
- int r, nid;
+ int r;
if (cap->flags)
return -EINVAL;
@@ -6399,9 +6399,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.numa_aware_page_table = true;
mutex_lock(&kvm->slots_lock);
- for_each_node(nid) {
- kvm->arch.split_shadow_page_cache[nid].node = nid;
- }
+ kvm_mmu_enable_numa_memory_cache(&kvm->arch.split_shadow_page_cache);
mutex_unlock(&kvm->slots_lock);
r = 0;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 31586a65e346..d5d966e4a8bf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1365,6 +1365,11 @@ int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc);
void kvm_mmu_empty_memory_cache(struct kvm_mmu_memory_cache *mc);
void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
+
+void kvm_mmu_init_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache);
+void kvm_mmu_enable_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache);
+int kvm_mmu_topup_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache, int min);
+void kvm_mmu_free_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache);
#endif
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 13032da2ddfc..7a58ea37bc15 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -101,6 +101,10 @@ struct kvm_mmu_memory_cache {
int node;
};
+struct kvm_mmu_numa_memory_cache {
+ struct kvm_mmu_memory_cache nodes[MAX_NUMNODES];
+};
+
#define KVM_MMU_MEMORY_CACHE_INIT() { \
.gfp_zero = __GFP_ZERO, \
.node = NUMA_NO_NODE, \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25a549705c8e..2607b546c3c9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -476,6 +476,43 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
BUG_ON(!p);
return p;
}
+
+void kvm_mmu_init_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache)
+{
+ int node;
+
+ for_each_node(node)
+ INIT_KVM_MMU_MEMORY_CACHE(&cache->nodes[node]);
+}
+
+void kvm_mmu_enable_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache)
+{
+ int node;
+
+ for_each_node(node)
+ cache->nodes[node].node = node;
+}
+
+int kvm_mmu_topup_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache, int min)
+{
+ int r, node;
+
+ for_each_online_node(node) {
+ r = kvm_mmu_topup_memory_cache(&cache->nodes[node], min);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+void kvm_mmu_free_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache)
+{
+ int node;
+
+ for_each_node(node)
+ kvm_mmu_free_memory_cache(&cache->nodes[node]);
+}
#endif
static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
Powered by blists - more mailing lists