[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1457360155-9610-1-git-send-email-pbonzini@redhat.com>
Date: Mon, 7 Mar 2016 15:15:45 +0100
From: Paolo Bonzini <pbonzini@...hat.com>
To: linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc: Takuya Yoshikawa <yoshikawa_takuya_b1@....ntt.co.jp>,
Xiao Guangrong <guangrong.xiao@...ux.intel.com>
Subject: [PATCH v2 0/9] cleanup around kvm_sync_page, and a few micro-optimizations
Having committed the ubsan fixes, this are the cleanups that are left.
Compared to v1, I have fixed the patch to coalesce page zapping after
mmu_sync_children (as requested by Takuya and Guangrong), and I have
rewritten is_last_gpte again in an even simpler way.
Paolo
Paolo Bonzini (9):
KVM: MMU: introduce kvm_mmu_flush_or_zap
KVM: MMU: move TLB flush out of __kvm_sync_page
KVM: MMU: use kvm_sync_page in kvm_sync_pages
KVM: MMU: cleanup __kvm_sync_page and its callers
KVM: MMU: invert return value of mmu.sync_page and *kvm_sync_page*
KVM: MMU: move zap/flush to kvm_mmu_get_page
KVM: MMU: coalesce more page zapping in mmu_sync_children
KVM: MMU: simplify is_last_gpte
KVM: MMU: micro-optimize gpte_access
arch/x86/include/asm/kvm_host.h | 8 +-
arch/x86/kvm/mmu.c | 167 ++++++++++++++++++++--------------------
arch/x86/kvm/paging_tmpl.h | 11 ++-
3 files changed, 92 insertions(+), 94 deletions(-)
--
1.8.3.1
>From 2e741e7bb4204c43adbabfa1b32a854cb3935140 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Wed, 24 Feb 2016 11:21:55 +0100
Subject: [PATCH 1/9] KVM: MMU: introduce kvm_mmu_flush_or_zap
This is a generalization of mmu_pte_write_flush_tlb, that also
takes care of calling kvm_mmu_commit_zap_page. The next
patches will introduce more uses.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0a4dc9b54181..6dae2356b9f5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4188,11 +4188,14 @@ static bool need_remote_flush(u64 old, u64 new)
return (old & ~new & PT64_PERM_MASK) != 0;
}
-static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
- bool remote_flush, bool local_flush)
+static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
+ struct list_head *invalid_list,
+ bool remote_flush, bool local_flush)
{
- if (zap_page)
+ if (!list_empty(invalid_list)) {
+ kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
return;
+ }
if (remote_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
@@ -4320,7 +4323,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
- bool remote_flush, local_flush, zap_page;
+ bool remote_flush, local_flush;
union kvm_mmu_page_role mask = { };
mask.cr0_wp = 1;
@@ -4337,7 +4340,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
return;
- zap_page = remote_flush = local_flush = false;
+ remote_flush = local_flush = false;
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
@@ -4357,8 +4360,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
- zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
++vcpu->kvm->stat.mmu_flooded;
continue;
}
@@ -4380,8 +4382,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++spte;
}
}
- mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush);
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
spin_unlock(&vcpu->kvm->mmu_lock);
}
--
1.8.3.1
>From c3c7e29240d21fd80d28ff26ec544e31beadff54 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Wed, 24 Feb 2016 10:03:27 +0100
Subject: [PATCH 2/9] KVM: MMU: move TLB flush out of __kvm_sync_page
By doing this, kvm_sync_pages can use __kvm_sync_page instead of
reinventing it. Because of kvm_mmu_flush_or_zap, the code does not
end up being more complex than before, and more cleanups to kvm_sync_pages
will come in the next patches.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 53 ++++++++++++++++++++++++-----------------------------
1 file changed, 24 insertions(+), 29 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6dae2356b9f5..45a8a0605a09 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1932,10 +1932,24 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return 1;
}
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
+static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
+ struct list_head *invalid_list,
+ bool remote_flush, bool local_flush)
+{
+ if (!list_empty(invalid_list)) {
+ kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
+ return;
+ }
+
+ if (remote_flush)
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ else if (local_flush)
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+}
+
static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
@@ -1943,8 +1957,7 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
int ret;
ret = __kvm_sync_page(vcpu, sp, &invalid_list, false);
- if (ret)
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, !ret);
return ret;
}
@@ -1975,17 +1988,11 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
kvm_unlink_unsync_page(vcpu->kvm, s);
- if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
- (vcpu->arch.mmu.sync_page(vcpu, s))) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
- continue;
- }
- flush = true;
+ if (!__kvm_sync_page(vcpu, s, &invalid_list, false))
+ flush = true;
}
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- if (flush)
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
}
struct mmu_page_path {
@@ -2071,6 +2078,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
while (mmu_unsync_walk(parent, &pages)) {
bool protected = false;
+ bool flush = false;
for_each_sp(pages, sp, parents, i)
protected |= rmap_write_protect(vcpu, sp->gfn);
@@ -2079,10 +2087,12 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
kvm_flush_remote_tlbs(vcpu->kvm);
for_each_sp(pages, sp, parents, i) {
- kvm_sync_page(vcpu, sp, &invalid_list);
+ if (!kvm_sync_page(vcpu, sp, &invalid_list))
+ flush = true;
+
mmu_pages_clear_parents(&parents);
}
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
cond_resched_lock(&vcpu->kvm->mmu_lock);
}
}
@@ -4188,21 +4198,6 @@ static bool need_remote_flush(u64 old, u64 new)
return (old & ~new & PT64_PERM_MASK) != 0;
}
-static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
- struct list_head *invalid_list,
- bool remote_flush, bool local_flush)
-{
- if (!list_empty(invalid_list)) {
- kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
- return;
- }
-
- if (remote_flush)
- kvm_flush_remote_tlbs(vcpu->kvm);
- else if (local_flush)
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
const u8 *new, int *bytes)
{
--
1.8.3.1
>From b8029b35c6a75c7e388c07be035624394c2199bd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Wed, 24 Feb 2016 10:19:30 +0100
Subject: [PATCH 3/9] KVM: MMU: use kvm_sync_page in kvm_sync_pages
If the last argument is true, kvm_unlink_unsync_page is called anyway in
__kvm_sync_page (either by kvm_mmu_prepare_zap_page or by __kvm_sync_page
itself). Therefore, kvm_sync_pages can just call kvm_sync_page, instead
of going through kvm_unlink_unsync_page+__kvm_sync_page.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 45a8a0605a09..56be33714036 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1987,8 +1987,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
- kvm_unlink_unsync_page(vcpu->kvm, s);
- if (!__kvm_sync_page(vcpu, s, &invalid_list, false))
+ if (!kvm_sync_page(vcpu, s, &invalid_list))
flush = true;
}
--
1.8.3.1
>From dad20250ae48906ca78b318a93f2a8223743dcc8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Wed, 24 Feb 2016 10:28:01 +0100
Subject: [PATCH 4/9] KVM: MMU: cleanup __kvm_sync_page and its callers
Calling kvm_unlink_unsync_page in the middle of __kvm_sync_page makes
things unnecessarily tricky. If kvm_mmu_prepare_zap_page is called,
it will call kvm_unlink_unsync_page too. So kvm_unlink_unsync_page can
be called just as well at the beginning or the end of __kvm_sync_page...
which means that we might do it in kvm_sync_page too and remove the
parameter.
kvm_sync_page ends up being the same code that kvm_sync_pages used
to have before the previous patch.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 56be33714036..88a1a79c869e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1917,16 +1917,13 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
/* @sp->gfn should be write-protected at the call site */
static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- struct list_head *invalid_list, bool clear_unsync)
+ struct list_head *invalid_list)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return 1;
}
- if (clear_unsync)
- kvm_unlink_unsync_page(vcpu->kvm, sp);
-
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return 1;
@@ -1956,7 +1953,7 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
LIST_HEAD(invalid_list);
int ret;
- ret = __kvm_sync_page(vcpu, sp, &invalid_list, false);
+ ret = __kvm_sync_page(vcpu, sp, &invalid_list);
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, !ret);
return ret;
@@ -1972,7 +1969,8 @@ static void mmu_audit_disable(void) { }
static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
- return __kvm_sync_page(vcpu, sp, invalid_list, true);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
+ return __kvm_sync_page(vcpu, sp, invalid_list);
}
/* @gfn should be write-protected at the call site */
--
1.8.3.1
>From 3283c4c561d7f5eca8144fa5b6a666df2ae00172 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Wed, 24 Feb 2016 11:07:14 +0100
Subject: [PATCH 5/9] KVM: MMU: invert return value of mmu.sync_page and
*kvm_sync_page*
Return true if the page was synced (and the TLB must be flushed)
and false if the page was zapped.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 31 ++++++++++++++-----------------
arch/x86/kvm/paging_tmpl.h | 4 ++--
2 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 88a1a79c869e..1c87102efb3d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1784,7 +1784,7 @@ static void mark_unsync(u64 *spte)
static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
- return 1;
+ return 0;
}
static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
@@ -1916,20 +1916,20 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if ((_sp)->role.direct || (_sp)->role.invalid) {} else
/* @sp->gfn should be write-protected at the call site */
-static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- struct list_head *invalid_list)
+static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
- return 1;
+ return false;
}
- if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+ if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
- return 1;
+ return false;
}
- return 0;
+ return true;
}
static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
@@ -1947,14 +1947,14 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
-static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp)
+static bool kvm_sync_page_transient(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp)
{
LIST_HEAD(invalid_list);
int ret;
ret = __kvm_sync_page(vcpu, sp, &invalid_list);
- kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, !ret);
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, ret);
return ret;
}
@@ -1966,7 +1966,7 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
-static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
kvm_unlink_unsync_page(vcpu->kvm, sp);
@@ -1985,8 +1985,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
- if (!kvm_sync_page(vcpu, s, &invalid_list))
- flush = true;
+ flush |= kvm_sync_page(vcpu, s, &invalid_list);
}
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
@@ -2084,9 +2083,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
kvm_flush_remote_tlbs(vcpu->kvm);
for_each_sp(pages, sp, parents, i) {
- if (!kvm_sync_page(vcpu, sp, &invalid_list))
- flush = true;
-
+ flush |= kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
@@ -2145,7 +2142,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (sp->role.word != role.word)
continue;
- if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
+ if (sp->unsync && !kvm_sync_page_transient(vcpu, sp))
break;
if (sp->unsync_children)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4174cf290fa3..a1f5459edcec 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -943,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
sizeof(pt_element_t)))
- return -EINVAL;
+ return 0;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
vcpu->kvm->tlbs_dirty++;
@@ -975,7 +975,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
host_writable);
}
- return !nr_present;
+ return nr_present;
}
#undef pt_element_t
--
1.8.3.1
>From 298603ce19f76347c3416fe2e730c4bc577a9675 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Wed, 24 Feb 2016 11:26:10 +0100
Subject: [PATCH 6/9] KVM: MMU: move zap/flush to kvm_mmu_get_page
kvm_mmu_get_page is the only caller of kvm_sync_page_transient
and kvm_sync_pages. Moving the handling of the invalid_list there
removes the need for the underdocumented kvm_sync_page_transient
function.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1c87102efb3d..fecc9c51d924 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1947,18 +1947,6 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
-static bool kvm_sync_page_transient(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp)
-{
- LIST_HEAD(invalid_list);
- int ret;
-
- ret = __kvm_sync_page(vcpu, sp, &invalid_list);
- kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, ret);
-
- return ret;
-}
-
#ifdef CONFIG_KVM_MMU_AUDIT
#include "mmu_audit.c"
#else
@@ -1974,21 +1962,21 @@ static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
}
/* @gfn should be write-protected at the call site */
-static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
+static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
+ struct list_head *invalid_list)
{
struct kvm_mmu_page *s;
- LIST_HEAD(invalid_list);
- bool flush = false;
+ bool ret = false;
for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
if (!s->unsync)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
- flush |= kvm_sync_page(vcpu, s, &invalid_list);
+ ret |= kvm_sync_page(vcpu, s, invalid_list);
}
- kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+ return ret;
}
struct mmu_page_path {
@@ -2119,6 +2107,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
unsigned quadrant;
struct kvm_mmu_page *sp;
bool need_sync = false;
+ bool flush = false;
+ LIST_HEAD(invalid_list);
role = vcpu->arch.mmu.base_role;
role.level = level;
@@ -2142,8 +2132,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (sp->role.word != role.word)
continue;
- if (sp->unsync && !kvm_sync_page_transient(vcpu, sp))
- break;
+ if (sp->unsync) {
+ /* The page is good, but __kvm_sync_page might still end
+ * up zapping it. If so, break in order to rebuild it.
+ */
+ if (!__kvm_sync_page(vcpu, sp, &invalid_list))
+ break;
+
+ WARN_ON(!list_empty(&invalid_list));
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
if (sp->unsync_children)
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -2173,11 +2171,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
kvm_flush_remote_tlbs(vcpu->kvm);
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
- kvm_sync_pages(vcpu, gfn);
+ flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
}
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
+
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
return sp;
}
--
1.8.3.1
>From a824edfa6b6d919b8a47c077fe9a3f26e6987041 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Thu, 25 Feb 2016 10:47:38 +0100
Subject: [PATCH 7/9] KVM: MMU: coalesce more page zapping in mmu_sync_children
mmu_sync_children can only process up to 16 pages at a time. Check
if we need to reschedule, and do not bother zapping the pages until
that happens.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/mmu.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1dbef19867e4..2463de0b935c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2059,24 +2059,31 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
struct mmu_page_path parents;
struct kvm_mmu_pages pages;
LIST_HEAD(invalid_list);
+ bool flush = false;
while (mmu_unsync_walk(parent, &pages)) {
bool protected = false;
- bool flush = false;
for_each_sp(pages, sp, parents, i)
protected |= rmap_write_protect(vcpu, sp->gfn);
- if (protected)
+ if (protected) {
kvm_flush_remote_tlbs(vcpu->kvm);
+ flush = false;
+ }
for_each_sp(pages, sp, parents, i) {
flush |= kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
- kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
- cond_resched_lock(&vcpu->kvm->mmu_lock);
+ if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+ cond_resched_lock(&vcpu->kvm->mmu_lock);
+ flush = false;
+ }
}
+
+ kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
}
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
--
1.8.3.1
>From 3cf42eeae76b93529bd0d768c64a10e8db9817b3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Tue, 23 Feb 2016 12:51:19 +0100
Subject: [PATCH 8/9] KVM: MMU: simplify is_last_gpte
Branch-free code is fun and everybody knows how much Avi loves it,
but is_last_gpte takes it a bit to the extreme. Since the code
is simply doing a range check, like
(level == 1 ||
((gpte & PT_PAGE_SIZE_MASK) && level < N)
we can make it branch-free without storing the entire truth table;
it is enough to cache N.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/include/asm/kvm_host.h | 8 ++-----
arch/x86/kvm/mmu.c | 50 +++++++++++++++++++++--------------------
2 files changed, 28 insertions(+), 30 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1c3e390993a2..d110dc44d6c2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -347,12 +347,8 @@ struct kvm_mmu {
struct rsvd_bits_validate guest_rsvd_check;
- /*
- * Bitmap: bit set = last pte in walk
- * index[0:1]: level (zero-based)
- * index[2]: pte.ps
- */
- u8 last_pte_bitmap;
+ /* Can have large pages at levels 2..last_nonleaf_level-1. */
+ u8 last_nonleaf_level;
bool nx;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fecc9c51d924..1dbef19867e4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3625,13 +3625,24 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
return false;
}
-static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
+static inline bool is_last_gpte(struct kvm_mmu *mmu,
+ unsigned level, unsigned gpte)
{
- unsigned index;
+ /*
+ * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
+ * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
+ * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+ */
+ gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
- index = level - 1;
- index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
- return mmu->last_pte_bitmap & (1 << index);
+ /*
+ * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
+ * If it is clear, there are no large pages at this level, so clear
+ * PT_PAGE_SIZE_MASK in gpte if that is the case.
+ */
+ gpte &= level - mmu->last_nonleaf_level;
+
+ return gpte & PT_PAGE_SIZE_MASK;
}
#define PTTYPE_EPT 18 /* arbitrary */
@@ -3903,22 +3914,13 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
}
}
-static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
- u8 map;
- unsigned level, root_level = mmu->root_level;
- const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */
-
- if (root_level == PT32E_ROOT_LEVEL)
- --root_level;
- /* PT_PAGE_TABLE_LEVEL always terminates */
- map = 1 | (1 << ps_set_index);
- for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
- if (level <= PT_PDPE_LEVEL
- && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
- map |= 1 << (ps_set_index | (level - 1));
- }
- mmu->last_pte_bitmap = map;
+ unsigned root_level = mmu->root_level;
+
+ mmu->last_nonleaf_level = root_level;
+ if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu))
+ mmu->last_nonleaf_level++;
}
static void paging64_init_context_common(struct kvm_vcpu *vcpu,
@@ -3930,7 +3932,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
- update_last_pte_bitmap(vcpu, context);
+ update_last_nonleaf_level(vcpu, context);
MMU_WARN_ON(!is_pae(vcpu));
context->page_fault = paging64_page_fault;
@@ -3957,7 +3959,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
- update_last_pte_bitmap(vcpu, context);
+ update_last_nonleaf_level(vcpu, context);
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
@@ -4015,7 +4017,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, context, false);
- update_last_pte_bitmap(vcpu, context);
+ update_last_nonleaf_level(vcpu, context);
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
@@ -4121,7 +4123,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, g_context, false);
- update_last_pte_bitmap(vcpu, g_context);
+ update_last_nonleaf_level(vcpu, g_context);
}
static void init_kvm_mmu(struct kvm_vcpu *vcpu)
--
1.8.3.1
>From 30fe56cdbd78a25a263d16e88083e6c8f797ce51 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@...hat.com>
Date: Tue, 23 Feb 2016 14:19:20 +0100
Subject: [PATCH 9/9] KVM: MMU: micro-optimize gpte_access
Avoid AND-NOT, most x86 processor lack an instruction for it.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/paging_tmpl.h | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a1f5459edcec..6013f3685ef4 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -189,8 +189,11 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
ACC_USER_MASK;
#else
- access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
- access &= ~(gpte >> PT64_NX_SHIFT);
+ BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
+ BUILD_BUG_ON(ACC_EXEC_MASK != 1);
+ access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
+ /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */
+ access ^= (gpte >> PT64_NX_SHIFT);
#endif
return access;
--
1.8.3.1
Powered by blists - more mailing lists